# Event Selection, NuInt Edition

# TODO:

- update signal definition (KE vs. P)
- remake dfs with updated signal definition
- upper limit on mu P for signal definition
- check pion rejection using chi2_pion
- check pion rejection using daughter particles
- start - vertex cut
- genie mode breakdown
- optimization
- kinematic reconstruction plots
- comparison with uboone
- justify MCS quality cut
- get event displays of backgrounds
- get event displays of signal non-CCQEs
- cut ablation studies?

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import matplotlib as mpl
from os import path
import sys
import uproot
from tqdm import tqdm

# local imports
from variable_configs import *

sys.path.append('/exp/sbnd/app/users/munjung/xsec/wienersvd/cafpyana')
from analysis_village.numucc1p0pi.selection_definitions import *
from pyanalib.split_df_helpers import *
from pyanalib.stat_helpers import *
from makedf.constants import *
from pyanalib.variable_calculator import get_cc1p0pi_tki
from pyanalib.pandas_helpers import pad_column_name

plt.style.use("presentation.mplstyle")

# suppress warnings
import warnings
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)

In [None]:
save_fig = True
# save_fig_dir = "/exp/sbnd/data/users/munjung/plots/event_selection/1mu1p"

In [None]:
cmap = mpl.cm.viridis
norm = mpl.colors.Normalize(vmin=0.0, vmax=1.0)

In [None]:
def p_to_KE(p, mass):
    return np.sqrt(p**2 + mass**2) - mass

p_to_KE(0.220, MUON_MASS)
p_to_KE(1, MUON_MASS)
p_to_KE(0.300, PROTON_MASS)
p_to_KE(1, PROTON_MASS)
# p_to_KE(0.070, PION_MASS)

# Load dataframes

- these should be dfs w/o any selection applied

In [None]:
## -- MC 
mc_file = "/exp/sbnd/data/users/munjung/xsec/2025B/MCP2025B_bnb_raw_daught.df"
# mc_file = "/exp/sbnd/data/users/munjung/xsec/2025B/MCP2025B_bnb_cosmics_hadded_50.df"
mc_split_df = pd.read_hdf(mc_file, key="split")
mc_n_split = get_n_split(mc_file)
print("mc_n_split: %d" %(mc_n_split))
print_keys(mc_file)

## -- Data
# data_file = "/exp/sbnd/data/users/munjung/xsec/2025B/devdata_raw_daught.df"
data_file = "/exp/sbnd/data/users/munjung/xsec/2025B/DevData_sel.df"
data_split_df = pd.read_hdf(data_file, key="split")
data_n_split = get_n_split(data_file)
print("data_n_split: %d" %(data_n_split))
print_keys(data_file)

data_hdr_file = "/exp/sbnd/data/users/munjung/xsec/2025B/DevData_sel_hdr.df"
data_hdr_df = pd.read_hdf(data_hdr_file, key="split")
data_n_split = get_n_split(data_hdr_file)
print("data_n_split: %d" %(data_n_split))
print_keys(data_hdr_file)


In [None]:
# concat dfs 
n_max_concat = 10

mc_keys2load = ['hdr', 'mcnu', 'evt', 'trk']
mc_dfs = load_dfs(mc_file, mc_keys2load, n_max_concat=n_max_concat)
mc_hdr_df = mc_dfs['hdr']
mc_mcnu_df = mc_dfs['mcnu']
mc_evt_df = mc_dfs['evt']
mc_trk_df = mc_dfs['trk']

data_keys2load = ['evt', 'trk']
data_dfs = load_dfs(data_file, data_keys2load, n_max_concat=n_max_concat)
data_evt_df = data_dfs['evt']
data_trk_df = data_dfs['trk']

data_keys2load = ['hdr']
data_dfs = load_dfs(data_hdr_file, data_keys2load, n_max_concat=n_max_concat)
data_hdr_df = data_dfs['hdr']

In [None]:
data_hdr_df

In [None]:
## total pot - scale to data POT

# BNB data
data_tot_pot = data_hdr_df['pot'].sum()
# data_tot_pot = 4.60e+18
print("data_tot_pot: %.3e" %(data_tot_pot))
pot_str = "7.37 $\\times 10^{18}$"
data_evt_df["pot_weight"] = np.ones(len(data_evt_df))
data_trk_df["pot_weight"] = np.ones(len(data_trk_df))
data_gates = data_hdr_df.nbnbinfo.sum()
print("data tot gates : %.3e" %(data_gates))

# BNB MC
mc_tot_pot = mc_hdr_df['pot'].sum()
print("mc_tot_pot: %.3e" %(mc_tot_pot))
mc_pot_scale = data_tot_pot / mc_tot_pot
print("mc_pot_scale: %.3e" %(mc_pot_scale))

mc_evt_df["pot_weight"] = mc_pot_scale * np.ones(len(mc_evt_df))
mc_trk_df["pot_weight"] = mc_pot_scale * np.ones(len(mc_trk_df))

# Intime Data
basepath = "/exp/sbnd/data/users/munjung/xsec/2025B"
fname = "trash/Data_intime_1000.df"
intime_hdr_df = pd.read_hdf(path.join(basepath, fname), "hdr")
intime_evt_df = pd.read_hdf(path.join(basepath, fname), "slc")
intime_trk_df = pd.read_hdf(path.join(basepath, fname), "trk")

# intime_gates = intime_hdr.noffbeambnb.sum()
intime_gates = intime_hdr_df[intime_hdr_df['first_in_subrun'] == 1]['noffbeambnb'].sum()
print("intime cosmics data gates: {:.2e}".format(intime_gates))
f = 0.08
scale_intime_to_lightdata = (1-f)*data_gates/intime_gates
print("goal scale: {:.2f}".format(scale_intime_to_lightdata))
intime_evt_df["gates_weight"] = scale_intime_to_lightdata * np.ones(len(intime_evt_df))
intime_trk_df["gates_weight"] = scale_intime_to_lightdata * np.ones(len(intime_trk_df))

In [None]:
# calculate TKI vars
P_mu_col = pad_column_name(("totp",), mc_evt_df.mu)
P_p_col = pad_column_name(("totp",), mc_evt_df.p)
mc_ret_tki = get_cc1p0pi_tki(mc_evt_df.mu, mc_evt_df.p, P_mu_col, P_p_col)

In [None]:
mc_evt_df['del_Tp'] = mc_ret_tki['del_Tp']
# mc_evt_df['del_Lp'] = mc_ret_tki['del_Lp']
mc_evt_df['del_p'] = mc_ret_tki['del_p']
mc_evt_df['del_alpha'] = mc_ret_tki['del_alpha']
mc_evt_df['del_phi'] = mc_ret_tki['del_phi']
# mc_evt_df['R'] = mc_ret_tki['R']

# Util

In [None]:
def IsNu(df):
    return (np.abs(df.pdg) == 14) | (np.abs(df.pdg) == 12)

def IsCosmic(df):
    return ~IsNu(df)

def IsNuOutFV(df):
    return IsNu(df) & ~InFV(df.slc.truth.position)

def IsNuInFV(df):
    return IsNu(df) & InFV(df.slc.truth.position)

def IsNuInFV_NuOther(df):
    return IsNuInFV(df) & (df.slc.truth.pdg != 14)

def IsNuInFV_NumuNC(df):
    return IsNuInFV(df) & (df.slc.truth.pdg == 14) & (df.slc.truth.iscc == 0)

# ---- numu CC in FV, breakdown in topology
# TODO: upper limit on muon energy!
def Is_1p0pi(df):
    return (df.nmu_27MeV == 1) & (df.np_50MeV == 1) & (df.npi_30MeV == 0) & (df.npi0 == 0) &\
        (np.sqrt(df.mu.genp.x**2 + df.mu.genp.y**2 + df.mu.genp.z**2) < 1) &\
        (np.sqrt(df.p.genp.x**2 + df.p.genp.y**2 + df.p.genp.z**2) < 1)

def Is_Np0pi(df):
    return (df.nmu_27MeV == 1) & (df.np_50MeV > 1) & (df.npi_30MeV == 0) & (df.npi0 == 0) &\
        (np.sqrt(df.mu.genp.x**2 + df.mu.genp.y**2 + df.mu.genp.z**2) < 1)

def IsNuInFV_NumuCC_Other(df):
    return IsNuInFV(df) & (df.slc.truth.pdg == 14) & (df.slc.truth.iscc == 1) &\
              ~Is_1p0pi(df) & ~Is_Np0pi(df)

def IsNuInFV_NumuCC_Np0pi(df):
    return IsNuInFV(df) & (df.slc.truth.pdg == 14) & (df.slc.truth.iscc == 1) &\
              Is_Np0pi(df)

def IsNuInFV_NumuCC_1p0pi(df):
    return IsNuInFV(df) & (df.slc.truth.pdg == 14) & (df.slc.truth.iscc == 1) &\
              Is_1p0pi(df)

# --- numu CC in FV, breakdown in genie mode
def IsNuInFV_NumuCC_QE(df):
    return IsNuInFV(df) & (df.slc.truth.pdg == 14) & (df.slc.truth.iscc == 1) &\
              (df.genie_mode == 0)

def IsNuInFV_NumuCC_MEC(df):
    return IsNuInFV(df) & (df.slc.truth.pdg == 14) & (df.slc.truth.iscc == 1) &\
              (df.genie_mode == 10)

def IsNuInFV_NumuCC_RES(df):
    return IsNuInFV(df) & (df.slc.truth.pdg == 14) & (df.slc.truth.iscc == 1) &\
              (df.genie_mode == 1)

def IsNuInFV_NumuCC_DIS(df):
    return IsNuInFV(df) & (df.slc.truth.pdg == 14) & (df.slc.truth.iscc == 1) &\
              (df.genie_mode == 2)

# def IsNuInFV_NumuCC_COH(df):
#     return IsNuInFV(df) & (df.slc.truth.pdg == 14) & (df.slc.truth.iscc == 1) &\
#               (df.genie_mode == 3)

def IsNuInFV_NumuCC_OtherMode(df):
    return IsNuInFV(df) & (df.slc.truth.pdg == 14) & (df.slc.truth.iscc == 1) &\
              ~(df.genie_mode == 0) & ~(df.genie_mode == 1) & ~(df.genie_mode == 2) & ~(df.genie_mode == 10)

# Plotter

In [None]:
nu_cosmics_labels = ["Cosmic", r"Out-FV $\nu$", r"FV $\nu$"]
# nu_cosmics_colors = ["#ED5564", "#FFCE54", "#A0D568"]
nu_cosmics_colors = ["gray", "C0", "C1"]

topology_labels = ["Cosmic", r"Out-FV $\nu$", r"FV other $\nu$", r"FV $\nu_{\mu}$ NC",  
                  r"FV $\nu_{\mu}$ CC Other", r"FV $\nu_{\mu}$ CC Np0$\pi$", r"FV $\nu_{\mu}$ CC 1p0$\pi$"]
topology_colors = ["gray", "sienna", "crimson", "darkgreen", 
                  "coral", "darkslateblue", "mediumslateblue"]

genie_labels = ["Cosmic", r"Out-FV $\nu$", r"FV $\nu$ Other", r"FV $\nu_{\mu}$ NC",
                r"FV $\nu_{\mu}$ CC Other", r"FV $\nu_{\mu}$ CC DIS",
                r"FV $\nu_{\mu}$ CC RES", r"FV $\nu_{\mu}$ CC MEC",
                r"FV $\nu_{\mu}$ CC QE"]
genie_colors = ["gray", "sienna", "crimson", "darkgreen",
                "#BFB17C", "#D88A3B", "#2c7c94",
                "#390C1E", "#9b5580"]

In [None]:
def hist_plot(type,
              evtdf, vardf, 
              vardf_data, var_intime,
              bins,
              plot_labels=["", "", ""],
              ratio = True,
              save_fig=False, save_name=None): 

    assert len(evtdf) == len(vardf)

    if type == "nu_cosmics":
        labels = nu_cosmics_labels
        colors = nu_cosmics_colors

        cut_cosmic = IsCosmic(evtdf)
        cut_nu_outfv = IsNuOutFV(evtdf)
        cut_nu_infv = IsNuInFV(evtdf)
        cuts = [cut_cosmic, cut_nu_outfv, cut_nu_infv]

    elif type == "topology":
        labels = topology_labels
        colors = topology_colors

        cut_cosmic = IsCosmic(evtdf)
        cut_nu_outfv = IsNuOutFV(evtdf)
        cut_nu_infv_nu_other = IsNuInFV_NuOther(evtdf)
        cut_nu_infv_numu_nc = IsNuInFV_NumuNC(evtdf)
        cut_nu_infv_numu_cc_other = IsNuInFV_NumuCC_Other(evtdf)
        cut_nu_infv_numu_cc_np0pi = IsNuInFV_NumuCC_Np0pi(evtdf)
        cut_nu_infv_numu_cc_1p0pi = IsNuInFV_NumuCC_1p0pi(evtdf)
        cuts = [cut_cosmic, cut_nu_outfv, cut_nu_infv_nu_other, cut_nu_infv_numu_nc, 
                cut_nu_infv_numu_cc_other, cut_nu_infv_numu_cc_np0pi, cut_nu_infv_numu_cc_1p0pi]

    elif type == "genie":
        labels = genie_labels
        colors = genie_colors

        cut_cosmic = IsCosmic(evtdf)
        cut_nu_outfv = IsNuOutFV(evtdf)
        cut_nu_infv_nu_other = IsNuInFV_NuOther(evtdf)
        cut_nu_infv_numu_nc = IsNuInFV_NumuNC(evtdf)
        print("numu NC", cut_nu_infv_numu_nc.sum())
        # cut_nu_infv_numu_coh = IsNuInFV_NumuCC_COH(evtdf)
        cut_nu_infv_numu_othermode = IsNuInFV_NumuCC_OtherMode(evtdf)
        cut_nu_infv_numu_cc_dis = IsNuInFV_NumuCC_DIS(evtdf)
        cut_nu_infv_numu_cc_res = IsNuInFV_NumuCC_RES(evtdf)
        cut_nu_infv_numu_cc_me = IsNuInFV_NumuCC_MEC(evtdf)
        cut_nu_infv_numu_cc_qe = IsNuInFV_NumuCC_QE(evtdf)
        cuts = [cut_cosmic, cut_nu_outfv, cut_nu_infv_nu_other, cut_nu_infv_numu_nc, 
                cut_nu_infv_numu_othermode, cut_nu_infv_numu_cc_dis, cut_nu_infv_numu_cc_res, 
                cut_nu_infv_numu_cc_me, cut_nu_infv_numu_cc_qe]
        

    else:
        raise ValueError("Invalid type: %s, please choose between [nu_cosmics, topolgy, or genie]" % type)

    # --- Plot template
    if ratio:
        fig, axs = plt.subplots(2, 1, figsize=(8.5, 8), 
                               sharex=True, gridspec_kw={'height_ratios': [3, 1]})
        fig.subplots_adjust(hspace=0.05)
        ax = axs[0]
        ax_r = axs[1]
    else:
        fig, ax = plt.subplots()

    bin_centers = 0.5 * (bins[:-1] + bins[1:])

    # --- Data
    total_data, bins = np.histogram(vardf_data, bins=bins)
    data_err = np.sqrt(total_data)
    ax.errorbar(bin_centers, total_data, yerr=data_err, 
                fmt='o', color='black')  # error bars

    # --- MC
    # collect all MC + intime
    var_categ = [var_intime] + [vardf[i] for i in cuts]
    scale_mc = evtdf.pot_weight.unique()[0]
    # TODO:
    scale_intime = scale_intime_to_lightdata
    weights_categ = [scale_intime*np.ones_like(var_intime)] + [scale_mc*np.ones_like(vardf[i]) for i in cuts] 
    colors = ["black"] + colors
    labels = ["Cosmic\n(In-time)"] + labels

    mc_stack, _, _ = ax.hist(var_categ,
                                bins=bins,
                                weights=weights_categ,
                                stacked=True,
                                color=colors,
                                label=labels,
                                edgecolor='none',
                                linewidth=0,
                                density=False,
                                histtype='stepfilled')


    # ---- MC stat err
    each_mc_hist_data = []
    each_mc_hist_err2 = []  # sum of squared weights for error

    for data, w in zip(var_categ, weights_categ):
        hist_vals, _ = np.histogram(data, bins=bins, weights=w)
        hist_err2, _ = np.histogram(data, bins=bins, weights=np.square(w))
        each_mc_hist_data.append(hist_vals)
        each_mc_hist_err2.append(hist_err2)

    total_mc = np.sum(each_mc_hist_data, axis=0)
    total_mc_err2 = np.sum(each_mc_hist_err2, axis=0)
    mc_stat_err = np.sqrt(total_mc_err2)

    ax.bar(
       bin_centers,
        2 * mc_stat_err,
        width=np.diff(bins),
        bottom=total_mc - mc_stat_err,
        facecolor='none',             # transparent fill
        edgecolor='dimgray',            # outline color of the hatching
        hatch='xxxx',                 # hatch pattern similar to ROOT's 3004
        linewidth=0.0,
        label='MC Stat. Unc.'
    )


    ax.set_xlim(bins[0], bins[-1])
    if ratio == False: # only plot xlabel if we're not plotting the ratio panel
        ax_r.set_xlabel(plot_labels[0])
    ax.set_ylabel(plot_labels[1])


    if ratio:
        # MC stat err
        mc_stat_err_ratio = mc_stat_err / total_mc
        mc_content_ratio = total_mc / total_mc
        mc_stat_err_ratio = np.nan_to_num(mc_stat_err_ratio, nan=0.)
        mc_content_ratio = np.nan_to_num(mc_content_ratio, nan=-999.)
        ax_r.bar(
            bin_centers,
            2*mc_stat_err_ratio,
            width=np.diff(bins),
            bottom=mc_content_ratio - mc_stat_err_ratio,
            facecolor='none',             # transparent fill
            edgecolor='dimgray',          # outline color of the hatching
            hatch='xxxx',                 # hatch pattern similar to ROOT's 3004
            linewidth=0.0,
            label='MC Stat. Unc.'
        )

        # data/MC ratio err
        data_eylow, data_eyhigh = return_data_stat_err(total_data)

        data_ratio = total_data / total_mc
        data_ratio_eylow = data_eylow / total_mc
        data_ratio_eyhigh = data_eyhigh / total_mc
        data_ratio = np.nan_to_num(data_ratio, nan=-999.)
        data_ratio_eylow = np.nan_to_num(data_ratio_eylow, nan=0.)
        data_ratio_eyhigh = np.nan_to_num(data_ratio_eyhigh, nan=0.)
        
        #data_ratio_errors = data_ratio_eylow + data_ratio_eyhigh
        #ax_ratio.errorbar(bin_centers, data_ratio, yerr=data_ratio_errors,
        #                 fmt='o', color='black', label='Data',
        #                 markersize=5, capsize=3, linewidth=1.5)

        ax_r.errorbar(bin_centers, data_ratio,
                  yerr=np.vstack((data_ratio_eylow, data_ratio_eyhigh)),
                  fmt='o', color='black')
                #   , label='Data')
                #   markersize=5, capsize=3, linewidth=1.5)
        
        # if highest value is greater than 2.0, set ylim to 2.0
        if np.max(data_ratio) > 2.0:
            ax_r.set_ylim(0., 2.0)

        ax_r.axhline(1.0, color='red', linestyle='--', linewidth=1)
        
        ax_r.grid(True)
        ax_r.minorticks_on()
        ax_r.grid(which='minor', linestyle=':', linewidth=0.5, color='gray', alpha=0.5)

        ax_r.set_xlabel(plot_labels[0])
        ax_r.set_ylabel("Data/MC")

    # --- Legend
    accum_sum = [np.sum(data) for data in mc_stack]
    accum_sum = [0.] + accum_sum
    total_sum = accum_sum[-1]
    individual_sums = [accum_sum[i + 1] - accum_sum[i] for i in range(len(accum_sum) - 1)]
    fractions = [(count / total_sum) * 100 for count in individual_sums]
    legend_labels = [f"{label} ({frac:.1f}%)" for label, frac in zip(labels[::-1], fractions[::-1])]
    legend_labels += ["Data", "MC Stat. Unc."]
    leg = ax.legend(legend_labels, 
                    loc='upper left', 
                    fontsize=10, 
                    frameon=False, 
                    ncol=3, 
                    bbox_to_anchor=(0.02, 0.98))
    leg_height = leg.get_bbox_to_anchor().height
    max_data_with_err = np.max(total_data + data_eyhigh)
    # ax.set_ylim(0., 1.05 * max_data_with_err + leg_height)
    ax.set_ylim(0., 1.4 * max_data_with_err)

    if save_fig:
        plt.savefig(save_name, bbox_inches='tight') #, dpi=300)
    plt.show()

    # bolder figure lines?
    # ax.tick_params(width=2, length=10)
    # for spine in ax.spines.values():
    #     spine.set_linewidth(2)
    
    ret_dict = {"cuts": cuts}
    return ret_dict

In [None]:
def bar_plot(type,
             evtdf,
             plot_labels=["", "", ""],
             save_fig=False, save_name=None): #, scale, stage):

    if type == "nu_cosmics":
        ncateg = 3
        labels = nu_cosmics_labels
        colors = nu_cosmics_colors

        cut_cosmic = IsCosmic(evtdf)
        cut_nu_outfv = IsNuOutFV(evtdf)
        cut_nu_intfv = IsNu(evtdf) & InFV(evtdf.slc.truth.position)
        cuts = [cut_cosmic, cut_nu_outfv, cut_nu_intfv]

    elif type == "topology":
        ncateg = 7
        labels = topology_labels
        colors = topology_colors

        cut_cosmic = IsCosmic(evtdf)
        cut_nu_outfv = IsNuOutFV(evtdf)
        cut_nu_infv_nu_other = IsNuInFV_NuOther(evtdf)
        cut_nu_infv_numu_nc = IsNuInFV_NumuNC(evtdf)
        cut_nu_infv_numu_cc_other = IsNuInFV_NumuCC_Other(evtdf)
        cut_nu_infv_numu_cc_np0pi = IsNuInFV_NumuCC_Np0pi(evtdf)
        cut_nu_infv_numu_cc_1p0pi = IsNuInFV_NumuCC_1p0pi(evtdf)

        cuts = [cut_cosmic, cut_nu_outfv, cut_nu_infv_nu_other, cut_nu_infv_numu_nc, 
                cut_nu_infv_numu_cc_other, cut_nu_infv_numu_cc_np0pi, cut_nu_infv_numu_cc_1p0pi]

    elif type == "genie":
        ncateg = 9
        labels = genie_labels
        colors = genie_colors

        cut_cosmic = IsCosmic(evtdf)
        cut_nu_outfv = IsNuOutFV(evtdf)
        cut_nu_infv_nu_other = IsNuInFV_NuOther(evtdf)
        cut_nu_infv_numu_nc = IsNuInFV_NumuNC(evtdf)
        # cut_nu_infv_numu_coh = IsNuInFV_NumuCC_COH(evtdf)
        cut_nu_infv_numu_othermode = IsNuInFV_NumuCC_OtherMode(evtdf)
        cut_nu_infv_numu_cc_dis = IsNuInFV_NumuCC_DIS(evtdf)
        cut_nu_infv_numu_cc_res = IsNuInFV_NumuCC_RES(evtdf)
        cut_nu_infv_numu_cc_me = IsNuInFV_NumuCC_MEC(evtdf)
        cut_nu_infv_numu_cc_qe = IsNuInFV_NumuCC_QE(evtdf)
        cuts = [cut_cosmic, cut_nu_outfv, cut_nu_infv_nu_other, cut_nu_infv_numu_nc, 
                cut_nu_infv_numu_othermode, cut_nu_infv_numu_cc_dis, cut_nu_infv_numu_cc_res, 
                cut_nu_infv_numu_cc_me, cut_nu_infv_numu_cc_qe]

    else:
        raise ValueError("Invalid type: %s, please choose between [nu_cosmics, topolgy, or genie]" % type)


    fig, ax = plt.subplots(figsize = (6, ncateg*0.6))

    scale = evtdf.pot_weight.unique()[0]
    size = [scale*len(evtdf[i]) for i in cuts]

    # # make sure that the categories don't overlap
    # for i in range(len(cuts)):
    #     for j in range(i+1, len(cuts)):
    #         if (cuts[i] & cuts[j]).sum() > 0:
    #             print("Categories overlap:", labels[i], labels[j])
    # # and check if the categories cover all events
    # if not np.array(size).sum() == len(evtdf):
    #     print("Categories do not cover all events")
    #     print("Total events:", len(evtdf))
    #     print("Sum of categories:", np.array(size).sum())

    bars = plt.barh(labels, size, align='center', color = colors)
    tot_count = np.array(size).sum()
    
    perc_list = []
    for bar in bars:
        width = bar.get_width()
        label_y_pos = bar.get_y() + bar.get_height() / 2
        perc = 100*(width+0.)/(tot_count+0.)
        ax.text(width+1, label_y_pos, s= ("%0.1f"%(100*(width+0.)/(tot_count+0.)) + "%"), va='center')
        perc_list.append(perc)

    plt.xlabel(plot_labels[0])
    plt.xlim(0, 1.12 * np.max(size))

    if save_fig:
        plt.savefig(save_name, bbox_inches="tight")

    ret_dict = {"cuts": cuts,
                "perc_list": perc_list}
    return ret_dict

In [None]:
plot_labels_bar = ["Events (POT={})".format(pot_str), "", ""]
plot_labels_hist = ["", "Events (POT={})".format(pot_str), ""]

# Collect Through Selection for Summary Plots

In [None]:
df_dict = {} # for efficiency plot
df_dict_data = {} # for efficiency plot
df_dict_intime = {} # for efficiency plot
perc_dict_topo = {} # for topo break-down plot
perc_dict_genie = {} # for genie break-down plot

# Cosmic Rejection with Slice Variables

In [None]:
stage_key = "allreco"
bar_type = "nu_cosmics"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_nu_cosmics = bar_plot(type=bar_type, evtdf=mc_evt_df,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

bar_type = "topology"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_topo = bar_plot(type=bar_type, evtdf=mc_evt_df,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

bar_type = "genie"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_genie = bar_plot(type=bar_type, evtdf=mc_evt_df,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

df_dict[stage_key] = mc_evt_df
df_dict_data[stage_key] = data_evt_df
df_dict_intime[stage_key] = intime_evt_df
perc_dict_topo[stage_key] = ret_topo["perc_list"]
perc_dict_genie[stage_key] = ret_genie["perc_list"]

In [None]:
# is_clear_cosmic cut
stage_key = "is_clear_cosmic"
mc_evt_df_precut = mc_evt_df[mc_evt_df.slc.is_clear_cosmic == 0]
data_evt_df_precut = data_evt_df[data_evt_df.slc.is_clear_cosmic == 0]
intime_evt_df_precut = intime_evt_df[intime_evt_df.slc.is_clear_cosmic == 0]

bar_type = "nu_cosmics"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_nu_cosmics = bar_plot(type=bar_type, evtdf=mc_evt_df_precut,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

bar_type = "topology"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_topo = bar_plot(type=bar_type, evtdf=mc_evt_df_precut,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

bar_type = "genie"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_genie = bar_plot(type=bar_type, evtdf=mc_evt_df_precut,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

df_dict[stage_key] = mc_evt_df_precut
df_dict_data[stage_key] = data_evt_df_precut
df_dict_intime[stage_key] = intime_evt_df_precut
perc_dict_topo[stage_key] = ret_topo["perc_list"]
perc_dict_genie[stage_key] = ret_genie["perc_list"]

# vertex cut
stage_key = "vertex_in_fv"
mc_evt_df_precut = mc_evt_df_precut[InFV(mc_evt_df_precut.slc.vertex)]
data_evt_df_precut = data_evt_df_precut[InFV(data_evt_df_precut.slc.vertex)]
intime_evt_df_precut = intime_evt_df_precut[InFV(intime_evt_df_precut.slc.vertex)]
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_nu_cosmics = bar_plot(type=bar_type, evtdf=mc_evt_df_precut,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

bar_type = "topology"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_topo = bar_plot(type=bar_type, evtdf=mc_evt_df_precut,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

bar_type = "genie"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_genie = bar_plot(type=bar_type, evtdf=mc_evt_df_precut,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

df_dict[stage_key] = mc_evt_df_precut
df_dict_data[stage_key] = data_evt_df_precut
df_dict_intime[stage_key] = intime_evt_df_precut
perc_dict_topo[stage_key] = ret_topo["perc_list"]
perc_dict_genie[stage_key] = ret_genie["perc_list"]


# nu-score cut
stage_key = "nu_score"
nu_score_th = 0.5
mc_evt_df_precut = mc_evt_df_precut[mc_evt_df_precut.slc.nu_score > nu_score_th]
data_evt_df_precut = data_evt_df_precut[data_evt_df_precut.slc.nu_score > nu_score_th]
intime_evt_df_precut = intime_evt_df_precut[intime_evt_df_precut.slc.nu_score > nu_score_th]
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_nu_cosmics = bar_plot(type=bar_type, evtdf=mc_evt_df_precut,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

bar_type = "topology"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_topo = bar_plot(type=bar_type, evtdf=mc_evt_df_precut,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

bar_type = "genie"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_genie = bar_plot(type=bar_type, evtdf=mc_evt_df_precut,
                          plot_labels=plot_labels_bar,
                          save_fig=save_fig, save_name=save_name)

df_dict[stage_key] = mc_evt_df_precut
df_dict_data[stage_key] = data_evt_df_precut
df_dict_intime[stage_key] = intime_evt_df_precut
perc_dict_topo[stage_key] = ret_topo["perc_list"]
perc_dict_genie[stage_key] = ret_genie["perc_list"]


## Data vs. MC Comparison

# Two-prong Selection

- two PFPs
- both contained
- both with trackscore > 0.5
- both start (position - vertex) < 0.5 cm
- one at least 50 cm

In [None]:
def match_trkdf_to_slcdf(trkdf, slcdf):
    # trkdf: df to match
    # slcdf: df to match to
    matched_trkdf = trkdf.reset_index(level=[3]).loc[slcdf.index].reset_index().set_index(trkdf.index.names)
    return matched_trkdf


In [None]:
def mag3d(df1, df2):
    return np.sqrt((df1.x - df2.x)**2 + (df1.y - df2.y)**2 + (df1.z - df2.z)**2)

In [None]:
mc_trk_df_precut = match_trkdf_to_slcdf(mc_trk_df, mc_evt_df_precut)
data_trk_df_precut = match_trkdf_to_slcdf(data_trk_df, data_evt_df_precut)
intime_trk_df_precut = match_trkdf_to_slcdf(intime_trk_df, intime_evt_df_precut)

# TODO: what's pfp.trk.producer = 4294967295? excluding these pfps for now
mc_trk_df_precut = mc_trk_df_precut[mc_trk_df_precut.pfp.trk.producer != 4294967295]
data_trk_df_precut = data_trk_df_precut[data_trk_df_precut.pfp.trk.producer != 4294967295]
intime_trk_df_precut = intime_trk_df_precut[intime_trk_df_precut.pfp.trk.producer != 4294967295]

# number of PFPs
mc_npfps = mc_trk_df_precut.pfp.id.groupby(level=[0,1,2]).count()
data_npfps = data_trk_df_precut.pfp.id.groupby(level=[0,1,2]).count()
intime_npfps = intime_trk_df_precut.pfp.id.groupby(level=[0,1,2]).count()
assert len(mc_evt_df_precut) == len(mc_npfps)
assert len(data_evt_df_precut) == len(data_npfps)
assert len(intime_evt_df_precut) == len(intime_npfps)

# sort by length
# TODO: check length agains start/end position
mc_trk_df_precut = mc_trk_df_precut.sort_values(by=('pfp','trk','len'), ascending=False)
data_trk_df_precut = data_trk_df_precut.sort_values(by=('pfp','trk','len'), ascending=False)
intime_trk_df_precut = intime_trk_df_precut.sort_values(by=('pfp','trk','len'), ascending=False)
# longest track
mc_track1 = mc_trk_df_precut.groupby(level=[0,1,2]).nth(0).reset_index(level=[3])
mc_track2 = mc_trk_df_precut.groupby(level=[0,1,2]).nth(1).reset_index(level=[3])
data_track1 = data_trk_df_precut.groupby(level=[0,1,2]).nth(0).reset_index(level=[3])
data_track2 = data_trk_df_precut.groupby(level=[0,1,2]).nth(1).reset_index(level=[3])
intime_track1 = intime_trk_df_precut.groupby(level=[0,1,2]).nth(0).reset_index(level=[3])
intime_track2 = intime_trk_df_precut.groupby(level=[0,1,2]).nth(1).reset_index(level=[3])


# 2-prong cut
stage_key = "2prong"
mc_mask = (mc_npfps == 2)
mc_evt_df_2prong = mc_evt_df_precut[mc_mask]
data_mask = (data_npfps == 2)
data_evt_df_2prong = data_evt_df_precut[data_mask]
intime_mask = (intime_npfps == 2)
intime_evt_df_2prong = intime_evt_df_precut[intime_mask]

bar_type = "topology"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_topo = bar_plot(type=bar_type, evtdf=mc_evt_df_2prong, 
                        plot_labels=plot_labels_bar,
                        save_fig=save_fig, save_name=save_name)

bar_type = "genie"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_genie = bar_plot(type=bar_type, evtdf=mc_evt_df_2prong, 
                        plot_labels=plot_labels_bar,
                        save_fig=save_fig, save_name=save_name)

df_dict[stage_key] = mc_evt_df_2prong
df_dict_data[stage_key] = data_evt_df_2prong
df_dict_intime[stage_key] = intime_evt_df_2prong
perc_dict_topo[stage_key] = ret_topo["perc_list"]
perc_dict_genie[stage_key] = ret_genie["perc_list"]

# both track contained
stage_key = "2prong-contained"
mc_mask = mc_mask & InFV(mc_track1.pfp.trk.end) & InFV(mc_track2.pfp.trk.end)
# mask = mask & InFV(mc_track1.pfp.trk.start) & InFV(mc_track2.pfp.trk.start)
mc_evt_df_2prong = mc_evt_df_precut[mc_mask]
data_mask = data_mask & InFV(data_track1.pfp.trk.end) & InFV(data_track2.pfp.trk.end)
data_evt_df_2prong = data_evt_df_precut[data_mask]
intime_mask = intime_mask & InFV(intime_track1.pfp.trk.end) & InFV(intime_track2.pfp.trk.end)
intime_evt_df_2prong = intime_evt_df_precut[intime_mask]

bar_type = "topology"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_topo = bar_plot(type=bar_type, evtdf=mc_evt_df_2prong, 
                        plot_labels=plot_labels_bar,
                        save_fig=save_fig, save_name=save_name)

bar_type = "genie"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_genie = bar_plot(type=bar_type, evtdf=mc_evt_df_2prong, 
                        plot_labels=plot_labels_bar,
                        save_fig=save_fig, save_name=save_name)

df_dict[stage_key] = mc_evt_df_2prong
df_dict_data[stage_key] = data_evt_df_2prong
df_dict_intime[stage_key] = intime_evt_df_2prong
perc_dict_topo[stage_key] = ret_topo["perc_list"]
perc_dict_genie[stage_key] = ret_genie["perc_list"]

# both track have trackscore > 0.5
stage_key = "2prong-trackscore"
trackscore_th = 0.5
mc_mask = mc_mask & (mc_track1.pfp.trackScore > trackscore_th) & (mc_track2.pfp.trackScore > trackscore_th)
mc_evt_df_2prong = mc_evt_df_precut[mc_mask]
data_mask = data_mask & (data_track1.pfp.trackScore > trackscore_th) & (data_track2.pfp.trackScore > trackscore_th)
data_evt_df_2prong = data_evt_df_precut[data_mask]
intime_mask = intime_mask & (intime_track1.pfp.trackScore > trackscore_th) & (intime_track2.pfp.trackScore > trackscore_th)
intime_evt_df_2prong = intime_evt_df_precut[intime_mask]

bar_type = "topology"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_topo = bar_plot(type=bar_type, evtdf=mc_evt_df_2prong, 
                        plot_labels=plot_labels_bar,
                        save_fig=save_fig, save_name=save_name)

bar_type = "genie"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_genie = bar_plot(type=bar_type, evtdf=mc_evt_df_2prong, 
                        plot_labels=plot_labels_bar,
                        save_fig=save_fig, save_name=save_name)

df_dict[stage_key] = mc_evt_df_2prong
df_dict_data[stage_key] = data_evt_df_2prong
df_dict_intime[stage_key] = intime_evt_df_2prong
perc_dict_topo[stage_key] = ret_topo["perc_list"]
perc_dict_genie[stage_key] = ret_genie["perc_list"]


# both start (position - vertex) < 1 cm
stage_key = "2prong-start_vertex_dist"
dist_th = 1.2
mc_mask = mc_mask & (mag3d(mc_track1.pfp.trk.start, mc_evt_df_precut.slc.vertex) < dist_th) & (mag3d(mc_track2.pfp.trk.start, mc_evt_df_precut.slc.vertex) < dist_th)
mc_evt_df_2prong = mc_evt_df_precut[mc_mask]
data_mask = data_mask & (mag3d(data_track1.pfp.trk.start, data_evt_df_precut.slc.vertex) < dist_th) & (mag3d(data_track2.pfp.trk.start, data_evt_df_precut.slc.vertex) < dist_th)
data_evt_df_2prong = data_evt_df_precut[data_mask]
intime_mask = intime_mask & (mag3d(intime_track1.pfp.trk.start, intime_evt_df_precut.slc.vertex) < dist_th) & (mag3d(intime_track2.pfp.trk.start, intime_evt_df_precut.slc.vertex) < dist_th)
intime_evt_df_2prong = intime_evt_df_precut[intime_mask]

bar_type = "topology"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_topo = bar_plot(type=bar_type, evtdf=mc_evt_df_2prong,
                        plot_labels=plot_labels_bar,
                        save_fig=save_fig, save_name=save_name)

bar_type = "genie"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_genie = bar_plot(type=bar_type, evtdf=mc_evt_df_2prong, 
                        plot_labels=plot_labels_bar,
                        save_fig=save_fig, save_name=save_name)

df_dict[stage_key] = mc_evt_df_2prong
df_dict_data[stage_key] = data_evt_df_2prong
df_dict_intime[stage_key] = intime_evt_df_2prong
perc_dict_topo[stage_key] = ret_topo["perc_list"]
perc_dict_genie[stage_key] = ret_genie["perc_list"]

# longer track > 50 cm
# mask = mask & (mc_track1.pfp.trk.len > 50)
# mc_evt_df_2prong = mc_evt_df_precut[mask]
# bar_plot(type=bar_type, evtdf=mc_evt_df_2prong)

# tracks that belong to the slices that paseed the above cuts
# mc_track dfs have the same index levels as slc dfs
mc_track1 = mc_track1.loc[mc_evt_df_2prong.index]
mc_track2 = mc_track2.loc[mc_evt_df_2prong.index]
data_track1 = data_track1.loc[data_evt_df_2prong.index]
data_track2 = data_track2.loc[data_evt_df_2prong.index]
intime_track1 = intime_track1.loc[intime_evt_df_2prong.index]
intime_track2 = intime_track2.loc[intime_evt_df_2prong.index]


## Data vs. MC Comparison

In [None]:
var_mc = mc_npfps
var_data = data_npfps
plt.hist(var_mc, bins=np.arange(0, 10, 1))
plt.show()

## Efficiency

# PID
- cut on track length & chi2 scores
- (MCS fwd P - MCS bwd P)
- quality cut on (range P - MCS P)
- TODO: number of daughter particles?

In [None]:
# compbine longer & shorter tracks into one df
mc_trk_df_2prong = pd.concat([mc_track1.reset_index().set_index(mc_trk_df.index.names),
                              mc_track2.reset_index().set_index(mc_trk_df.index.names)])
data_trk_df_2prong = pd.concat([data_track1.reset_index().set_index(data_trk_df.index.names),
                              data_track2.reset_index().set_index(data_trk_df.index.names)])
intime_trk_df_2prong = pd.concat([intime_track1.reset_index().set_index(intime_trk_df.index.names),
                              intime_track2.reset_index().set_index(intime_trk_df.index.names)])


## Selection

In [None]:
def avg_chi2(df, var_name):
    planes = ['I0', 'I1', 'I2']
    chi2_vals = []
    for plane in planes:
        chi2 = df['pfp']['trk']['chi2pid'][plane][var_name]
        chi2_vals.append(chi2)
    chi2_df = pd.concat(chi2_vals, axis=1)
    # fill 0 with nan
    chi2_df = chi2_df.replace(0, np.nan)
    avg = chi2_df.mean(axis=1, skipna=True)
    return avg

In [None]:
bins = np.linspace(0, 30, 101)
var = avg_chi2(mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13], "chi2_muon")
plt.hist(var, bins=bins, histtype="step", label="3-plane Average")
for i in range(3):
    var = mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13].pfp.trk.chi2pid["I{}".format(i)].chi2_muon
    plt.hist(var, bins=bins, histtype="step", label="Plane {}".format(i))
plt.legend()
plt.xlabel("Muon-like Score")
plt.ylabel("Tracks (POT=1.0e20)")
plt.title("True Muon Tracks in 2-Prong Slices")
save_name = save_fig_dir + "/muon_chi2_muon_avg.png"
if save_fig:
    plt.savefig(save_name)
plt.show()

var = avg_chi2(mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 2212], "chi2_muon")
plt.hist(var, bins=bins, histtype="step", label="avg")
for i in range(3):
    var = mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 2212].pfp.trk.chi2pid["I{}".format(i)].chi2_muon
    plt.hist(var, bins=bins, histtype="step", label="I{}".format(i))
plt.legend()
plt.show()

bins = np.linspace(0, 300, 101)

var = avg_chi2(mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13], "chi2_proton")
plt.hist(var, bins=bins, histtype="step", label="avg")
for i in range(3):
    var = mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13].pfp.trk.chi2pid["I{}".format(i)].chi2_proton
    plt.hist(var, bins=bins, histtype="step", label="I{}".format(i))
plt.legend()
plt.show()

var = avg_chi2(mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 2212], "chi2_proton")
plt.hist(var, bins=bins, histtype="step", label="avg")
for i in range(3):
    var = mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 2212].pfp.trk.chi2pid["I{}".format(i)].chi2_proton
    plt.hist(var, bins=bins, histtype="step", label="i{}".format(i))
plt.legend()
plt.show()

In [None]:
# TODO: average score over planes
# ---- find muon candidate from all tracks
# purity scan
mc_chimu_avg = avg_chi2(mc_trk_df_2prong, "chi2_muon")
mc_chip_avg = avg_chi2(mc_trk_df_2prong, "chi2_proton")
data_chimu_avg = avg_chi2(data_trk_df_2prong, "chi2_muon")
data_chip_avg = avg_chi2(data_trk_df_2prong, "chi2_proton")
# intime_chimu_avg = avg_chi2(intime_trk_df_2prong, "chi2_muon")
# intime_chip_avg = avg_chi2(intime_trk_df_2prong, "chi2_proton")
intime_chimu_avg = intime_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_muon
intime_chip_avg = intime_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_proton

# chi2mu_scan = np.arange(5, 40, 1)
# chi2p_scan = np.arange(5, 150, 5)
# chi2_scan_grid = np.zeros((len(chi2mu_scan), len(chi2p_scan)))
# ax[0].set_ylabel(plot_labels[1])
# ax[0].plot(bins, bins, color="red", linestyle="--", alpha=0.5)
# ax[0].set_title(plot_labels[2])
# plt.colorbar(im[3], ax=ax[0], label="Tracks")

# mean_list = []
# sig_list = []
# for i in range(len(bins)-1):
#     for j, mu_chi2p_th in enumerate(chi2p_scan):
#         # mu_cut = (mc_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_muon > 0) & (mc_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_muon < mu_chi2mu_th) & (mc_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_proton > mu_chi2p_th)
#         mu_cut = (chimu_avg > 0) & (chimu_avg < mu_chi2mu_th) & (chip_avg > mu_chi2p_th)
#         mu_cut = mu_cut & (mc_trk_df_2prong.pfp.trk.len > 50)
#         mu_candidates = mc_trk_df_2prong[mu_cut]
#         mu_candidates_prequal = mu_candidates

#         # more muon-like than pion-like
#         # mu_cut = mu_cut & (mc_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_muon < mc_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_pion * 0.9)

#         # muon quality cut
#         mu_cut = mu_cut & (np.abs((mc_trk_df_2prong.pfp.trk.rangeP.p_muon - mc_trk_df_2prong.pfp.trk.mcsP.fwdP_muon) / mc_trk_df_2prong.pfp.trk.rangeP.p_muon) < 0.5)
#         mu_candidates = mc_trk_df_2prong[mu_cut]

#         # if multiple mu candidates in a slice, choose the longer one
#         mu_candidates = mu_candidates.sort_values(by=('pfp', 'trk', 'len'), ascending=False)
#         mu_candidate = mu_candidates.groupby(level=[0,1,2]).nth(0)

#         mu_eff = len(mu_candidates[mu_candidates.pfp.trk.truth.p.pdg == 13])/len(mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13])
#         mu_purity = len(mu_candidate[mu_candidate.pfp.trk.truth.p.pdg == 13])/len(mu_candidate)

#         if mu_purity > best_purity:
#             best_purity = mu_purity
#             best_purity_chi2mu = mu_chi2mu_th
#             best_purity_chi2p = mu_chi2p_th
#         if mu_eff > best_eff:
#             best_eff = mu_eff
#             best_eff_chi2mu = mu_chi2mu_th
#             best_eff_chi2p = mu_chi2p_th

#         chi2_scan_grid[i, j] = mu_purity
#         chi2_scan_grid_eff[i, j] = mu_eff
#         # norm_mu_purity = (mu_purity - np.nanmin(chi2_scan_grid)) / (np.nanmax(chi2_scan_grid) - np.nanmin(chi2_scan_grid))
#         # norm_mu_eff = (mu_eff - np.nanmin(chi2_scan_grid_eff)) / (np.nanmax(chi2_scan_grid_eff) - np.nanmin(chi2_scan_grid_eff))
#         # color_purity = plt.cm.viridis(norm_mu_purity)
#         # color_eff = plt.cm.viridis(norm_mu_eff)
#         # ax_p.plot(mu_chi2mu_th, mu_chi2p_th, "o", color=color_purity, markersize=5)
#         # ax_e.plot(mu_chi2mu_th, mu_chi2p_th, "o", color=color_eff, markersize=5)
#         # print("chimu: {}, chip: {}, muon purity: {}".format(mu_chi2mu_th, mu_chi2p_th, mu_purity))
# print("best purity: {} at mu_chi2mu: {}, mu_chi2p: {}".format(best_purity, best_purity_chi2mu, best_purity_chi2p))
# print("best eff: {} at mu_chi2mu: {}, mu_chi2p: {}".format(best_eff, best_eff_chi2mu, best_eff_chi2p))

# # find where this is
# max_purity = chi2_scan_grid[chi2_scan_grid_eff > 0.75].max()
# where_max = np.where((chi2_scan_grid == max_purity) & (chi2_scan_grid_eff > 0.75))
# print("Maximum purity where efficiency > 0.75 is:", max_purity)
# print("Indices (i, j) where this occurs:", list(zip(where_max[0], where_max[1])))
# print("mu_chi2mu_th", chi2mu_scan[where_max[0]])
# print("mu_chi2p_th", chi2p_scan[where_max[1]])

# --- optimized cut
mu_chi2mu_th = 35
mu_chi2p_th = 100
mu_len_th = 50

# mu_cut = (mc_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_muon > 0) & (mc_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_muon < mu_chi2mu_th) & (mc_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_proton > mu_chi2p_th)
mc_mu_cut = (mc_chimu_avg > 0) & (mc_chimu_avg < mu_chi2mu_th) & (mc_chip_avg > mu_chi2p_th)
mc_mu_cut = mc_mu_cut & (mc_trk_df_2prong.pfp.trk.len > 50)
mc_mu_candidates = mc_trk_df_2prong[mc_mu_cut]
mc_mu_candidates_prequal = mc_mu_candidates
data_mu_cut = (data_chimu_avg > 0) & (data_chimu_avg < mu_chi2mu_th) & (data_chip_avg > mu_chi2p_th)
data_mu_cut = data_mu_cut & (data_trk_df_2prong.pfp.trk.len > 50)
data_mu_candidates = data_trk_df_2prong[data_mu_cut]
data_mu_candidates_prequal = data_mu_candidates
intime_mu_cut = (intime_chimu_avg > 0) & (intime_chimu_avg < mu_chi2mu_th) & (intime_chip_avg > mu_chi2p_th)
intime_mu_cut = intime_mu_cut & (intime_trk_df_2prong.pfp.trk.len > 50)
intime_mu_candidates = intime_trk_df_2prong[intime_mu_cut]
intime_mu_candidates_prequal = intime_mu_candidates

# more muon-like than pion-like
# mu_cut = mu_cut & (mc_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_muon < mc_trk_df_2prong.pfp.trk.chi2pid.I2.chi2_pion * 0.9)

# muon quality cut
mc_mu_cut = mc_mu_cut & (np.abs((mc_trk_df_2prong.pfp.trk.rangeP.p_muon - mc_trk_df_2prong.pfp.trk.mcsP.fwdP_muon) / mc_trk_df_2prong.pfp.trk.rangeP.p_muon) < 0.5)
mc_mu_candidates = mc_trk_df_2prong[mc_mu_cut]
data_mu_cut = data_mu_cut & (np.abs((data_trk_df_2prong.pfp.trk.rangeP.p_muon - data_trk_df_2prong.pfp.trk.mcsP.fwdP_muon) / data_trk_df_2prong.pfp.trk.rangeP.p_muon) < 0.5)
data_mu_candidates = data_trk_df_2prong[data_mu_cut]
intime_mu_cut = intime_mu_cut & (np.abs((intime_trk_df_2prong.pfp.trk.rangeP.p_muon - intime_trk_df_2prong.pfp.trk.mcsP.fwdP_muon) / intime_trk_df_2prong.pfp.trk.rangeP.p_muon) < 0.5)
intime_mu_candidates = intime_trk_df_2prong[intime_mu_cut]

# if multiple mu candidates in a slice, choose the longer one
mc_mu_candidates = mc_mu_candidates.sort_values(by=('pfp', 'trk', 'len'), ascending=False)
mc_mu_candidate = mc_mu_candidates.groupby(level=[0,1,2]).nth(0)
data_mu_candidates = data_mu_candidates.sort_values(by=('pfp', 'trk', 'len'), ascending=False)
data_mu_candidate = data_mu_candidates.groupby(level=[0,1,2]).nth(0)
intime_mu_candidates = intime_mu_candidates.sort_values(by=('pfp', 'trk', 'len'), ascending=False)
intime_mu_candidate = intime_mu_candidates.groupby(level=[0,1,2]).nth(0)

mc_mu_eff = len(mc_mu_candidates[mc_mu_candidates.pfp.trk.truth.p.pdg == 13])/len(mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13])
mc_mu_purity = len(mc_mu_candidate[mc_mu_candidate.pfp.trk.truth.p.pdg == 13])/len(mc_mu_candidate)
print("chimu: {}, chip: {}, muon efficiency: {}, purity: {}".format(mu_chi2mu_th, mu_chi2p_th, mc_mu_eff, mc_mu_purity))

In [None]:
mc_trk_idx = mc_trk_df_2prong.index.difference(mc_mu_candidate.index)
mc_not_mu_candidate = mc_trk_df_2prong.loc[mc_trk_idx]
mc_chimu_avg = avg_chi2(mc_not_mu_candidate, "chi2_muon")
mc_chip_avg = avg_chi2(mc_not_mu_candidate, "chi2_proton")
data_trk_idx = data_trk_df_2prong.index.difference(data_mu_candidate.index)
data_not_mu_candidate = data_trk_df_2prong.loc[data_trk_idx]
data_chimu_avg = avg_chi2(data_not_mu_candidate, "chi2_muon")
data_chip_avg = avg_chi2(data_not_mu_candidate, "chi2_proton")
intime_trk_idx = intime_trk_df_2prong.index.difference(intime_mu_candidate.index)
intime_not_mu_candidate = intime_trk_df_2prong.loc[intime_trk_idx]
# intime_chimu_avg = avg_chi2(intime_not_mu_candidate, "chi2_muon")
# intime_chip_avg = avg_chi2(intime_not_mu_candidate, "chi2_proton")
intime_chimu_avg = intime_not_mu_candidate.pfp.trk.chi2pid.I2.chi2_muon
intime_chip_avg = intime_not_mu_candidate.pfp.trk.chi2pid.I2.chi2_proton

# -- scan over chi2_muon and chi2_proton
# chi2mu_scan = np.arange(20, 30, 1)
# chi2p_scan = np.arange(60, 120, 1)
# chi2_scan_grid = np.zeros((len(chi2mu_scan), len(chi2p_scan)))
# chi2_scan_grid_eff = np.zeros((len(chi2mu_scan), len(chi2p_scan)))

# best_purity = 0
# best_purity_chi2mu = 0
# best_purity_chi2p = 0
# best_eff = 0
# best_eff_chi2mu = 0
# best_eff_chi2p = 0

# # for i, p_chi2mu_th in enumerate(chi2mu_scan):
# i = 0
# for j, p_chi2p_th in enumerate(chi2p_scan):
#     # p_cut = (not_mu_candidate.pfp.trk.chi2pid.I2.chi2_muon > p_chi2mu_th) & (not_mu_candidate.pfp.trk.chi2pid.I2.chi2_proton > 0) & (not_mu_candidate.pfp.trk.chi2pid.I2.chi2_proton < p_chi2p_th)
#     p_cut = (chip_avg > 0) & (chip_avg < p_chi2p_th)
#     p_candidates = not_mu_candidate[p_cut]
#     p_eff = len(p_candidates[p_candidates.pfp.trk.truth.p.pdg == 2212])/len(not_mu_candidate[not_mu_candidate.pfp.trk.truth.p.pdg == 2212])
#     p_purity = len(p_candidates[p_candidates.pfp.trk.truth.p.pdg == 2212])/len(p_candidates)

#     if p_purity > best_purity:
#         best_purity = p_purity
#         # best_purity_chi2mu = p_chi2mu_th
#         best_purity_chi2p = p_chi2p_th
#     if p_eff > best_eff:
#         best_eff = p_eff
#         # best_eff_chi2mu = p_chi2mu_th
#         best_eff_chi2p = p_chi2p_th

#     chi2_scan_grid[i, j] = p_purity
#     chi2_scan_grid_eff[i, j] = p_eff

# print("best purity: {} at p_chi2mu: {}, p_chi2p: {}".format(best_purity, best_purity_chi2mu, best_purity_chi2p))
# print("best eff: {} at p_chi2mu: {}, p_chi2p: {}".format(best_eff, best_eff_chi2mu, best_eff_chi2p))

# # find where this is
# max_purity = chi2_scan_grid[chi2_scan_grid_eff > 0.85].max()
# where_max = np.where((chi2_scan_grid == max_purity) & (chi2_scan_grid_eff > 0.85))
# print("Maximum purity where efficiency > 0.85 is:", max_purity)
# print("Indices (i, j) where this occurs:", list(zip(where_max[0], where_max[1])))
# print("p_chi2mu_th", chi2mu_scan[where_max[0]])
# print("p_chi2p_th", chi2p_scan[where_max[1]])


# ---- in leftovers, find proton candidates
p_chi2p_th = 90

# p_cut = (not_mu_candidate.pfp.trk.chi2pid.I2.chi2_proton > 0) & (not_mu_candidate.pfp.trk.chi2pid.I2.chi2_proton < p_chi2p_th)
mc_p_cut = (mc_chip_avg > 0) & (mc_chip_avg < p_chi2p_th)
mc_p_candidates = mc_not_mu_candidate[mc_p_cut]
data_p_cut = (data_chip_avg > 0) & (data_chip_avg < p_chi2p_th)
data_p_candidates = data_not_mu_candidate[data_p_cut]
intime_p_cut = (intime_chip_avg > 0) & (intime_chip_avg < p_chi2p_th)
intime_p_candidates = intime_not_mu_candidate[intime_p_cut]

print("proton efficiency", len(mc_p_candidates[mc_p_candidates.pfp.trk.truth.p.pdg == 2212])/len(mc_not_mu_candidate[mc_not_mu_candidate.pfp.trk.truth.p.pdg == 2212]))
print("proton purity", len(mc_p_candidates[mc_p_candidates.pfp.trk.truth.p.pdg == 2212])/len(mc_p_candidates))

In [None]:
# ---- limit to the target kinematic range 
MASS_MUON = 0.1056583745
mu_low_th = 0.22
mu_hi_th = 1

mc_mu_kin_cut = (mc_mu_candidate.pfp.trk.rangeP.p_muon > mu_low_th)
mc_mu_kin_cut = mc_mu_kin_cut & (mc_mu_candidate.pfp.trk.rangeP.p_muon < mu_hi_th)
mc_mu_candidate = mc_mu_candidate[mc_mu_kin_cut]
data_mu_kin_cut = (data_mu_candidate.pfp.trk.rangeP.p_muon > mu_low_th)
data_mu_kin_cut = data_mu_kin_cut & (data_mu_candidate.pfp.trk.rangeP.p_muon < mu_hi_th)
data_mu_candidate = data_mu_candidate[data_mu_kin_cut]
intime_mu_kin_cut = (intime_mu_candidate.pfp.trk.rangeP.p_muon > mu_low_th)
intime_mu_kin_cut = intime_mu_kin_cut & (intime_mu_candidate.pfp.trk.rangeP.p_muon < mu_hi_th)
intime_mu_candidate = intime_mu_candidate[intime_mu_kin_cut]

MASS_PROTON = 0.938272081
p_low_th = 0.3
p_hi_th = 1

mc_p_kin_cut = (mc_p_candidates.pfp.trk.rangeP.p_proton > p_low_th)
mc_p_kin_cut = mc_p_kin_cut & (mc_p_candidates.pfp.trk.rangeP.p_proton < p_hi_th)
mc_p_candidates = mc_p_candidates[mc_p_kin_cut]
data_p_kin_cut = (data_p_candidates.pfp.trk.rangeP.p_proton > p_low_th)
data_p_kin_cut = data_p_kin_cut & (data_p_candidates.pfp.trk.rangeP.p_proton < p_hi_th)
data_p_candidates = data_p_candidates[data_p_kin_cut]
intime_p_kin_cut = (intime_p_candidates.pfp.trk.rangeP.p_proton > p_low_th)
intime_p_kin_cut = intime_p_kin_cut & (intime_p_candidates.pfp.trk.rangeP.p_proton < p_hi_th)
intime_p_candidates = intime_p_candidates[intime_p_kin_cut]

# select mu-X slice
stage_key = "2prong-muX"
mc_mu_idx = mc_mu_candidate.reset_index(level=[3]).index.unique()
mc_evt_df_muX = mc_evt_df_2prong.loc[mc_mu_idx]
data_mu_idx = data_mu_candidate.reset_index(level=[3]).index.unique()
data_evt_df_muX = data_evt_df_2prong.loc[data_mu_idx]
intime_mu_idx = intime_mu_candidate.reset_index(level=[3]).index.unique()
intime_evt_df_muX = intime_evt_df_2prong.loc[intime_mu_idx]

bar_type = "topology"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_topo = bar_plot(type=bar_type, evtdf=mc_evt_df_muX,
                     plot_labels=plot_labels_bar,
                     save_fig=save_fig, save_name=save_name)

bar_type = "genie"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_genie = bar_plot(type=bar_type, evtdf=mc_evt_df_muX, 
                      plot_labels=plot_labels_bar,
                      save_fig=save_fig, save_name=save_name)

df_dict[stage_key] = mc_evt_df_muX
df_dict_data[stage_key] = data_evt_df_muX
df_dict_intime[stage_key] = intime_evt_df_muX
perc_dict_topo[stage_key] = ret_topo["perc_list"]
perc_dict_genie[stage_key] = ret_genie["perc_list"]

# select mu-p slice
stage_key = "2prong-mup"
mc_p_idx = mc_p_candidates.reset_index(level=[3]).index.unique()
mc_mu_p_idx = mc_mu_idx.intersection(mc_p_idx)
print("mc_mu_idx", len(mc_mu_idx))
print("mc_p_idx", len(mc_p_idx))
print("mc_mu_p_idx", len(mc_mu_p_idx))
data_p_idx = data_p_candidates.reset_index(level=[3]).index.unique()
data_mu_p_idx = data_mu_idx.intersection(data_p_idx)
print("data_mu_idx", len(data_mu_idx))
print("data_p_idx", len(data_p_idx))
print("data_mu_p_idx", len(data_mu_p_idx))
intime_p_idx = intime_p_candidates.reset_index(level=[3]).index.unique()
intime_mu_p_idx = intime_mu_idx.intersection(intime_p_idx)
print("intime_mu_idx", len(intime_mu_idx))
print("intime_p_idx", len(intime_p_idx))
print("intime_mu_p_idx", len(intime_mu_p_idx))

mc_evt_df_mup = mc_evt_df_2prong.loc[mc_mu_p_idx]
data_evt_df_mup = data_evt_df_2prong.loc[data_mu_p_idx]
intime_evt_df_mup = intime_evt_df_2prong.loc[intime_mu_p_idx]
bar_type = "topology"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_topo = bar_plot(type=bar_type, evtdf=mc_evt_df_mup,
                     plot_labels=plot_labels_bar,
                     save_fig=save_fig, save_name=save_name)

bar_type = "genie"
save_name = save_fig_dir + "/bar_plot-{}-{}.png".format(bar_type, stage_key)
ret_genie = bar_plot(type=bar_type, evtdf=mc_evt_df_mup, 
                      plot_labels=plot_labels_bar,
                      save_fig=save_fig, save_name=save_name)

df_dict[stage_key] = mc_evt_df_mup
df_dict_data[stage_key] = data_evt_df_mup
df_dict_intime[stage_key] = intime_evt_df_mup
perc_dict_topo[stage_key] = ret_topo["perc_list"]
perc_dict_genie[stage_key] = ret_genie["perc_list"]

# match tracks to slices
mc_mu_df = match_trkdf_to_slcdf(mc_mu_candidate, mc_evt_df_mup)
mc_p_df = match_trkdf_to_slcdf(mc_p_candidates, mc_evt_df_mup)
data_mu_df = match_trkdf_to_slcdf(data_mu_candidate, data_evt_df_mup)
data_p_df = match_trkdf_to_slcdf(data_p_candidates, data_evt_df_mup)
intime_mu_df = match_trkdf_to_slcdf(intime_mu_candidate, intime_evt_df_mup)
intime_p_df = match_trkdf_to_slcdf(intime_p_candidates, intime_evt_df_mup)


## Data vs. MC Comparison

In [None]:
# TODO: are there any cases where the proton is longer than the muon?

longer_muon_1mu1p = (mc_track1.pfp.trk.truth.p.pdg == 13) & (mc_track2.pfp.trk.truth.p.pdg == 2212)
longer_proton_1mu1p = (mc_track1.pfp.trk.truth.p.pdg == 2212) & (mc_track2.pfp.trk.truth.p.pdg == 13)

print("1mu1p, longer muon", longer_muon_1mu1p.sum()/len(longer_muon_1mu1p))
print("1mu1p, longer proton", longer_proton_1mu1p.sum()/len(longer_muon_1mu1p))

longer_muon_1mu1mu = (mc_track1.pfp.trk.truth.p.pdg == 13) & (mc_track2.pfp.trk.truth.p.pdg == 13)
print("1mu1mu, longer muon", longer_muon_1mu1mu.sum()/len(longer_muon_1mu1mu))

piplus_proton = (mc_track1.pfp.trk.truth.p.pdg == 211) & (mc_track2.pfp.trk.truth.p.pdg == 2212)
piminus_proton = (mc_track1.pfp.trk.truth.p.pdg == -211) & (mc_track2.pfp.trk.truth.p.pdg == 2212)
print("piplus_proton", piplus_proton.sum()/len(piplus_proton))
print("piminus_proton", piminus_proton.sum()/len(piminus_proton))

mu_piplus = (mc_track1.pfp.trk.truth.p.pdg == 13) & (mc_track2.pfp.trk.truth.p.pdg == 211)
mu_piminus = (mc_track1.pfp.trk.truth.p.pdg == 13) & (mc_track2.pfp.trk.truth.p.pdg == -211)
print("mu_piplus", mu_piplus.sum()/len(mu_piplus))
print("mu_piminus", mu_piminus.sum()/len(mu_piminus))

print("-----------------")
print("longer track")
print(mc_track1.pfp.trk.truth.p.pdg.value_counts())
print("shorter track")
print(mc_track2.pfp.trk.truth.p.pdg.value_counts())

In [None]:
# mu score comparison for different true pdgs
var_name = ("pfp", "trk", "len", "", "", "")
bins = np.linspace(0, 300, 81)
pdg_labels = ["Muon", "Pion", "Proton"]

mc_bothtracks_mu = mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13][var_name]
mc_bothtracks_pi = mc_trk_df_2prong[np.abs(mc_trk_df_2prong.pfp.trk.truth.p.pdg) == 211][var_name]
mc_bothtracks_p = mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 2212][var_name]
var = [mc_bothtracks_mu, mc_bothtracks_pi, mc_bothtracks_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
# flip the legend order
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1])

plt.xlabel("Track Length [cm]")
plt.title("Tracks in 2-prong Slices")
save_name = save_fig_dir + "/track_length_2prong.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

mc_track1_mu = mc_track1[mc_track1.pfp.trk.truth.p.pdg == 13][var_name]
mc_track1_pi = mc_track1[np.abs(mc_track1.pfp.trk.truth.p.pdg) == 211][var_name]
mc_track1_p = mc_track1[mc_track1.pfp.trk.truth.p.pdg == 2212][var_name]
var = [mc_track1_mu, mc_track1_pi, mc_track1_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.legend()
plt.xlabel("Track Length [cm]")
plt.title("Longer Track in 2-prong Slices")
save_name = save_fig_dir + "/track_length_2prong_longer.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

bins = np.linspace(0, 150, 81)
mc_track2_mu = mc_track2[mc_track2.pfp.trk.truth.p.pdg == 13][var_name]
mc_track2_pi = mc_track2[np.abs(mc_track2.pfp.trk.truth.p.pdg) == 211][var_name]
mc_track2_p = mc_track2[mc_track2.pfp.trk.truth.p.pdg == 2212][var_name]
var = [mc_track2_mu, mc_track2_pi, mc_track2_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.title("Shorter Track in 2-prong Slices")
save_name = save_fig_dir + "/track_length_2prong_shorter.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# mu score comparison for different true pdgs
# var_name = ("pfp", "trk", "chi2pid", "I2", "chi2_muon", "")
chi2mu_avg = avg_chi2(mc_trk_df_2prong, "chi2_muon")
bins = np.linspace(0, 80, 81)

mc_bothtracks_mu = chi2mu_avg[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13]
mc_bothtracks_pi = chi2mu_avg[np.abs(mc_trk_df_2prong.pfp.trk.truth.p.pdg) == 211]
mc_bothtracks_p = chi2mu_avg[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 2212]
var = [mc_bothtracks_mu, mc_bothtracks_pi, mc_bothtracks_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.legend()
plt.xlabel("Muon-like Score (3-plane Average)")
plt.title("Tracks in 2-prong Slices")
save_name = save_fig_dir + "/chi2mu_2prong.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

chi2mu_avg = avg_chi2(mc_track1, "chi2_muon")
mc_track1_mu = chi2mu_avg[mc_track1.pfp.trk.truth.p.pdg == 13]
mc_track1_pi = chi2mu_avg[np.abs(mc_track1.pfp.trk.truth.p.pdg) == 211]
mc_track1_p = chi2mu_avg[mc_track1.pfp.trk.truth.p.pdg == 2212]
var = [mc_track1_mu, mc_track1_pi, mc_track1_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.legend()
plt.xlabel("Muon-like Score (3-plane Average)")
plt.title("Longer Track in 2-prong Slices")
save_name = save_fig_dir + "/chi2mu_2prong_longer.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

chi2mu_avg = avg_chi2(mc_track2, "chi2_muon")
mc_track2_mu = chi2mu_avg[mc_track2.pfp.trk.truth.p.pdg == 13]
mc_track2_pi = chi2mu_avg[np.abs(mc_track2.pfp.trk.truth.p.pdg) == 211]
mc_track2_p = chi2mu_avg[mc_track2.pfp.trk.truth.p.pdg == 2212]
var = [mc_track2_mu, mc_track2_pi, mc_track2_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.title("Shorter Track in 2-prong Slices")
save_name = save_fig_dir + "/chi2mu_2prong_shorter.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# pi score comparison for different true pdgs
# var_name = ("pfp", "trk", "chi2pid", "I2", "chi2_pion", "")
chi2pi_avg = avg_chi2(mc_trk_df_2prong, "chi2_pion")
bins = np.linspace(0, 80, 81)

mc_bothtracks_mu = chi2pi_avg[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13]
mc_bothtracks_pi = chi2pi_avg[np.abs(mc_trk_df_2prong.pfp.trk.truth.p.pdg) == 211]
mc_bothtracks_p = chi2pi_avg[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 2212]
var = [mc_bothtracks_mu, mc_bothtracks_pi, mc_bothtracks_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.legend()
plt.xlabel("Pion-like Score (3-plane Average)")
plt.title("Tracks in 2-prong Slices")
save_name = save_fig_dir + "/chi2pi_2prong.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

chi2pi_avg = avg_chi2(mc_track1, "chi2_pion")
mc_track1_mu = chi2pi_avg[mc_track1.pfp.trk.truth.p.pdg == 13]
mc_track1_pi = chi2pi_avg[np.abs(mc_track1.pfp.trk.truth.p.pdg) == 211]
mc_track1_p = chi2pi_avg[mc_track1.pfp.trk.truth.p.pdg == 2212]
var = [mc_track1_mu, mc_track1_pi, mc_track1_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.legend()
plt.xlabel("Pion-like Score (3-plane Average)")
plt.title("Longer Track in 2-prong Slices")
save_name = save_fig_dir + "/chi2pi_2prong_longer.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

chi2pi_avg = avg_chi2(mc_track2, "chi2_pion")
mc_track2_mu = chi2pi_avg[mc_track2.pfp.trk.truth.p.pdg == 13]
mc_track2_pi = chi2pi_avg[np.abs(mc_track2.pfp.trk.truth.p.pdg) == 211]
mc_track2_p = chi2pi_avg[mc_track2.pfp.trk.truth.p.pdg == 2212]
var = [mc_track2_mu, mc_track2_pi, mc_track2_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.title("Shorter Track in 2-prong Slices")
plt.xlabel("Pion-like Score (3-plane Average)")
save_name = save_fig_dir + "/chi2pi_2prong_shorter.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# pi score comparison for different true pdgs
# var_name_1 = ("pfp", "trk", "chi2pid", "I2", "chi2_muon", "")
# var_name_2 = ("pfp", "trk", "chi2pid", "I2", "chi2_pion", "")
chi2mu_avg = avg_chi2(mc_trk_df_2prong, "chi2_muon")
chi2pi_avg = avg_chi2(mc_trk_df_2prong, "chi2_pion")
chi2ratio_avg = chi2mu_avg / chi2pi_avg
bins = np.linspace(0, 2, 81)

mc_bothtracks_mu = chi2ratio_avg[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13]
mc_bothtracks_pi = chi2ratio_avg[np.abs(mc_trk_df_2prong.pfp.trk.truth.p.pdg) == 211]
mc_bothtracks_p = chi2ratio_avg[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 2212]
var = [mc_bothtracks_mu, mc_bothtracks_pi, mc_bothtracks_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.legend()
plt.xlabel("Muon-like Score / Pion-like Score (3-plane Average)")
plt.title("Tracks in 2-prong Slices")
save_name = save_fig_dir + "/chi2ratio_2prong.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

chi2mu_avg = avg_chi2(mc_track1, "chi2_muon")
chi2pi_avg = avg_chi2(mc_track1, "chi2_pion")
chi2ratio_avg = chi2mu_avg / chi2pi_avg
mc_track1_mu = chi2ratio_avg[mc_track1.pfp.trk.truth.p.pdg == 13]
mc_track1_pi = chi2ratio_avg[np.abs(mc_track1.pfp.trk.truth.p.pdg) == 211]
mc_track1_p = chi2ratio_avg[mc_track1.pfp.trk.truth.p.pdg == 2212]
var = [mc_track1_mu, mc_track1_pi, mc_track1_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.legend()
plt.xlabel("Muon-like Score / Pion-like Score (3-plane Average)")
plt.title("Longer Track in 2-prong Slices")
save_name = save_fig_dir + "/chi2ratio_2prong_longer.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

chi2mu_avg = avg_chi2(mc_track2, "chi2_muon")
chi2pi_avg = avg_chi2(mc_track2, "chi2_pion")
chi2ratio_avg = chi2mu_avg / chi2pi_avg
mc_track2_mu = chi2ratio_avg[mc_track2.pfp.trk.truth.p.pdg == 13]
mc_track2_pi = chi2ratio_avg[np.abs(mc_track2.pfp.trk.truth.p.pdg) == 211]
mc_track2_p = chi2ratio_avg[mc_track2.pfp.trk.truth.p.pdg == 2212]
var = [mc_track2_mu, mc_track2_pi, mc_track2_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.title("Shorter Track in 2-prong Slices")
save_name = save_fig_dir + "/chi2ratio_2prong_shorter.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# proton score comparison for different true pdgs
# var_name = ("pfp", "trk", "chi2pid", "I2", "chi2_proton", "")
chi2p_avg = avg_chi2(mc_trk_df_2prong, "chi2_proton")
bins = np.linspace(0, 250, 81)
mc_bothtracks_mu = chi2p_avg[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13]
mc_bothtracks_pi = chi2p_avg[np.abs(mc_trk_df_2prong.pfp.trk.truth.p.pdg) == 211]
mc_bothtracks_p = chi2p_avg[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 2212]
var = [mc_bothtracks_mu, mc_bothtracks_pi, mc_bothtracks_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.legend()
plt.xlabel("Proton-like Score (3-plane Average)")
plt.title("Tracks in 2-prong Slices")
save_name = save_fig_dir + "/chi2p_2prong.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

chi2p_avg = avg_chi2(mc_track1, "chi2_proton")
mc_track1_mu = chi2p_avg[mc_track1.pfp.trk.truth.p.pdg == 13]
mc_track1_pi = chi2p_avg[np.abs(mc_track1.pfp.trk.truth.p.pdg) == 211]
mc_track1_p = chi2p_avg[mc_track1.pfp.trk.truth.p.pdg == 2212]
var = [mc_track1_mu, mc_track1_pi, mc_track1_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.legend()
plt.xlabel("Proton-like Score (3-plane Average)")
plt.title("Longer Track in 2-prong Slices")
save_name = save_fig_dir + "/chi2p_2prong_longer.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

chi2p_avg = avg_chi2(mc_track2, "chi2_proton")
mc_track2_mu = chi2p_avg[mc_track2.pfp.trk.truth.p.pdg == 13]
mc_track2_pi = chi2p_avg[np.abs(mc_track2.pfp.trk.truth.p.pdg) == 211]
mc_track2_p = chi2p_avg[mc_track2.pfp.trk.truth.p.pdg == 2212]
var = [mc_track2_mu, mc_track2_pi, mc_track2_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.title("Shorter Track in 2-prong Slices")
save_name = save_fig_dir + "/chi2p_2prong_shorter.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
bins = np.linspace(-1, 1, 41)
mu_candidate_mu = mc_mu_candidates_prequal[mc_mu_candidates_prequal.pfp.trk.truth.p.pdg == 13]
mu_candidate_pi = mc_mu_candidates_prequal[np.abs(mc_mu_candidates_prequal.pfp.trk.truth.p.pdg) == 211]
var = [(mu_candidate_mu.pfp.trk.rangeP.p_muon - mu_candidate_mu.pfp.trk.mcsP.fwdP_muon) / mu_candidate_mu.pfp.trk.rangeP.p_muon,
       (mu_candidate_pi.pfp.trk.rangeP.p_muon - mu_candidate_pi.pfp.trk.mcsP.fwdP_muon) / mu_candidate_pi.pfp.trk.rangeP.p_muon]
plt.hist(var, bins=np.linspace(-1, 1, 41), histtype="step", density=True, label=["Muon", "Pion"])
plt.axvline(-0.5, color="red", linestyle="--")
plt.axvline(0.5, color="red", linestyle="--")
plt.xlabel("(Range P - MCS P) / Range P")
plt.ylabel("Tracks [Area Normalized]")
plt.xlim(bins[0], bins[-1])

# flip legend order
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1])

save_name = save_fig_dir + "/mu_quality_cut_mcs.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
mc_evt_df_mup[ret_topo["cuts"][-3]].genie_mode.value_counts()

In [None]:
# var_name = ("pfp", "trk", "chi2pid", "I2", "chi2_muon", "")
chi2mu_avg = avg_chi2(mc_not_mu_candidate, "chi2_muon")
bins = np.linspace(0, 80, 81)

mc_bothtracks_mu = chi2mu_avg[mc_not_mu_candidate.pfp.trk.truth.p.pdg == 13]
mc_bothtracks_pi = chi2mu_avg[np.abs(mc_not_mu_candidate.pfp.trk.truth.p.pdg) == 211]
mc_bothtracks_p = chi2mu_avg[mc_not_mu_candidate.pfp.trk.truth.p.pdg == 2212]
var = [mc_bothtracks_mu, mc_bothtracks_pi, mc_bothtracks_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.legend()
plt.xlabel("Muon-like Score (3-plane Average)")
plt.title("Non-muon Tracks")
save_name = save_fig_dir + "/chi2mu_nonmuon.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

# var_name = ("pfp", "trk", "chi2pid", "I2", "chi2_proton", "")
chi2p_avg = avg_chi2(mc_not_mu_candidate, "chi2_proton")
bins = np.linspace(0, 250, 81)

mc_bothtracks_mu = chi2p_avg[mc_not_mu_candidate.pfp.trk.truth.p.pdg == 13]
mc_bothtracks_pi = chi2p_avg[np.abs(mc_not_mu_candidate.pfp.trk.truth.p.pdg) == 211]
mc_bothtracks_p = chi2p_avg[mc_not_mu_candidate.pfp.trk.truth.p.pdg == 2212]
var = [mc_bothtracks_mu, mc_bothtracks_pi, mc_bothtracks_p]
plt.hist(var, bins=bins, histtype="step", density=True, 
         label=pdg_labels)
plt.legend()
plt.xlabel("Proton-like Score (3-plane Average)")
plt.title("Non-muon Tracks")
save_name = save_fig_dir + "/chi2p_nonmuon.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

## Efficiency

In [None]:
fig, ax = plt.subplots()
var_name = ("pfp", "trk", "rangeP", "p_muon")
bins = np.linspace(0, 1, 21)
var_precut = mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 13][var_name]
var_postcut = mc_mu_candidate[mc_mu_candidate.pfp.trk.truth.p.pdg == 13][var_name]
var_postcut_all = mc_mu_candidate[var_name]
n_precut, _, _ = ax.hist(var_precut, bins=bins, histtype="step", alpha=0.75)
n_postcut, _, _ = ax.hist(var_postcut, bins=bins, histtype="step", alpha=0.75)
n_postcut_all, _ = np.histogram(var_postcut_all, bins=bins)
ax.set_xlabel("Reconstructed Muon Momentum [GeV/c]")
ax.set_ylabel("Tracks")

ax_eff = ax.twinx()
eff = n_postcut / n_precut
pur = n_postcut / n_postcut_all
bin_centers = (bins[:-1] + bins[1:])/2
ax_eff.plot(bin_centers, eff, color="k")
# ax_eff.plot(bins[:-1], pur, color="gray", linestyle="--")
# ax_eff.axhline(0.5, color="red", linestyle="--")
eff_threshold = bins[np.where(eff > 0.5)[0][0]]
print("threshold: ", eff_threshold)
ax_eff.axvline(0.22, color="red", linestyle="--")
# ax_eff.text(eff_threshold, 0.5, "50% Threshold={:.2f}".format(eff_threshold), color="red", fontsize=12)

ax_eff.set_ylabel("Efficiency")
ax_eff.set_ylim(0, 1.05)
plt.xlim(bins[0], bins[-1])

# legend using dummies
plt.plot([], [], label="Pre-cut Tracks", alpha=0.75, color="C0")
plt.plot([], [], label="Post-cut Tracks", alpha=0.75, color="C1")
plt.plot([], [], label="Efficiency", color="k")
plt.legend()
plt.title("Muon Candidate Selection")


save_name = save_fig_dir + "/muon_candidate_selection_eff.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

print("---- muon candidate selection ----")
print("integrated efficiency: {:.2f}".format(n_postcut.sum() / n_precut.sum()))
print("integrated purity: {:.2f}".format(n_postcut.sum() / len(mc_mu_candidate)))


fig, ax = plt.subplots()
var_name = ("pfp", "trk", "rangeP", "p_proton")
bins = np.linspace(0, 1, 26)
var_precut = mc_trk_df_2prong[mc_trk_df_2prong.pfp.trk.truth.p.pdg == 2212][var_name]
var_postcut = mc_p_candidates[mc_p_candidates.pfp.trk.truth.p.pdg == 2212][var_name]
var_postcut_all = mc_p_candidates[var_name]
n_precut, _, _ = ax.hist(var_precut, bins=bins, histtype="step", alpha=0.75)
n_postcut, _, _ = ax.hist(var_postcut, bins=bins, histtype="step", alpha=0.75)
n_postcut_all, _ = np.histogram(var_postcut_all, bins=bins)
ax.set_xlabel("Reconstructed Proton Momentum [GeV/c]")
ax.set_ylabel("Tracks")

ax_eff = ax.twinx()
eff = n_postcut / n_precut
pur = n_postcut / n_postcut_all
bin_centers = (bins[:-1] + bins[1:])/2
ax_eff.plot(bin_centers, eff, color="k")
# ax_eff.plot(bins[:-1], pur, color="gray", linestyle="--")
eff_threshold = bins[np.where(eff > 0.5)[0][0]]
# ax_eff.axhline(0.5, color="red", linestyle="--")
# ax_eff.text(eff_threshold, 0.5, "Efficiency > 0.5", color="red", fontsize=12)
print("threshold: ", eff_threshold)
ax_eff.axvline(0.3, color="red", linestyle="--")
# ax_eff.axvline(1, color="red", linestyle="--")
# ax_eff.text(eff_threshold, 0.5, "50% Threshold={:.2f}".format(eff_threshold), color="red", fontsize=12)

ax_eff.set_ylabel("Efficiency")
ax_eff.set_ylim(0, 1.05)

plt.xlim(bins[0], bins[-1])

# legend using dummies
plt.plot([], [], label="Pre-cut Tracks", alpha=0.75, color="C0")
plt.plot([], [], label="Post-cut Tracks", alpha=0.75, color="C1")
plt.plot([], [], label="Efficiency", color="k")
plt.legend()
plt.title("Proton Candidate Selection")

save_name = save_fig_dir + "/proton_candidate_selection_eff.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

print("---- proton candidate selection ----")
print("integrated efficiency: {:.2f}".format(n_postcut.sum() / n_precut.sum()))
print("integrated purity: {:.2f}".format(n_postcut.sum() / len(mc_p_candidates)))

fig, ax = plt.subplots()
var_name = ("pfp", "trk", "rangeP", "p_pion")
bins = np.linspace(0, 0.5, 21)
var_precut = mc_trk_df_2prong[np.abs(mc_trk_df_2prong.pfp.trk.truth.p.pdg) == 211][var_name]
both_candidates = pd.concat([mc_p_candidates, mc_mu_candidate])
var_postcut = both_candidates[np.abs(both_candidates.pfp.trk.truth.p.pdg) == 211][var_name]
var_postcut_all = both_candidates[var_name]
n_precut, _, _ = ax.hist(var_precut, bins=bins, histtype="step", alpha=0.75)
n_postcut, _, _ = ax.hist(var_postcut, bins=bins, histtype="step", alpha=0.75)
n_postcut_all, _ = np.histogram(var_postcut_all, bins=bins)
ax.set_xlabel("Reconstructed Pion Momentum [GeV/c]")
ax.set_ylabel("Tracks")

ax_eff = ax.twinx()
eff = n_postcut / n_precut
pur = n_postcut / n_postcut_all
ax_eff.plot(bins[:-1], eff, color="k")
# ax_eff.plot(bins[:-1], pur, color="gray", linestyle="--")
eff_threshold = bins[np.where(eff > 0.5)[0][0]]
# ax_eff.axhline(0.5, color="red", linestyle="--")
# ax_eff.text(eff_threshold, 0.5, "Efficiency > 0.5", color="red", fontsize=12)
print("threshold: ", eff_threshold)
ax_eff.axvline(0.3, color="red", linestyle="--")
# ax_eff.axvline(1, color="red", linestyle="--")
# ax_eff.text(eff_threshold, 0.5, "50% Threshold={:.2f}".format(eff_threshold), color="red", fontsize=12)

ax_eff.set_ylabel("Efficiency")
ax_eff.set_ylim(0, 1.05)

plt.xlim(bins[0], bins[-1])

# legend using dummies
plt.plot([], [], label="Pre-cut Tracks", alpha=0.75, color="C0")
plt.plot([], [], label="Post-cut Tracks", alpha=0.75, color="C1")
plt.plot([], [], label="Efficiency", color="k")
plt.legend()
plt.title("Pion Rejection")

save_name = save_fig_dir + "/pion_rejection_eff.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

print("---- pion rejection ----")
print("integrated efficiency: {:.2f}".format(n_postcut.sum() / n_precut.sum()))
print("integrated purity: {:.2f}".format(n_postcut.sum() / len(both_candidates)))

# Results

In [None]:
from variable_configs import VariableConfig

In [None]:
mc_mu_df = mc_mu_df.reset_index(level=[3])
mc_p_df = mc_p_df.reset_index(level=[3])

data_mu_df = data_mu_df.reset_index(level=[3])
data_p_df = data_p_df.reset_index(level=[3])

intime_mu_df = intime_mu_df.reset_index(level=[3])
intime_p_df = intime_p_df.reset_index(level=[3])


In [None]:
save_fig = True

In [None]:
eps = 1e-8

In [None]:
save_fig_dir = "plots"


In [None]:
# muon momentum 
var_config = VariableConfig.muon_momentum()
evtdf = mc_evt_df_mup
vardf = mc_mu_df.pfp.trk.rangeP.p_muon
vardf_data = data_mu_df.pfp.trk.rangeP.p_muon
vardf_intime = intime_mu_df.pfp.trk.rangeP.p_muon
bins = var_config.bins
vardf = np.clip(vardf, bins[0], bins[-1] - eps)
vardf_data = np.clip(vardf_data, bins[0], bins[-1] - eps)
vardf_intime = np.clip(vardf_intime, bins[0], bins[-1] - eps)

# plot
plot_labels = [var_config.var_labels[1], 
               "Events (POT={})".format(pot_str), ""]

plot_type = "topology"
save_name = save_fig_dir + "/selected-muon_momentum_{}.png".format(plot_type)
ret_hist_topo = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

plot_type = "genie"
save_name = save_fig_dir + "/selected-muon_momentum_{}.png".format(plot_type)
ret_hist_genie = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

In [None]:
# proton momentum 
var_config = VariableConfig.proton_momentum()
evtdf = mc_evt_df_mup
vardf = mc_p_df.pfp.trk.rangeP.p_proton
vardf_data = data_p_df.pfp.trk.rangeP.p_proton
vardf_intime = intime_p_df.pfp.trk.rangeP.p_proton
vardf = np.clip(vardf, bins[0], bins[-1] - eps)
vardf_data = np.clip(vardf_data, bins[0], bins[-1] - eps)
vardf_intime = np.clip(vardf_intime, bins[0], bins[-1] - eps)
bins = var_config.bins
plot_labels = [var_config.var_labels[1], 
               "Events (POT={})".format(pot_str), ""]

plot_type = "topology"
save_name = save_fig_dir + "/selected-proton_momentum_{}.png".format(plot_type)
ret_hist_topo = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

plot_type = "genie"
save_name = save_fig_dir + "/selected-proton_momentum_{}.png".format(plot_type)
ret_hist_genie = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

In [None]:
# Muon z-direction
var_config = VariableConfig.muon_direction()
evtdf = mc_evt_df_mup
vardf = mc_mu_df.pfp.trk.dir.z
vardf_data = data_mu_df.pfp.trk.dir.z
vardf_intime = intime_mu_df.pfp.trk.dir.z
vardf = np.clip(vardf, var_config.bins[0], var_config.bins[-1] - eps)
vardf_data = np.clip(vardf_data, var_config.bins[0], var_config.bins[-1] - eps)
vardf_intime = np.clip(vardf_intime, var_config.bins[0], var_config.bins[-1] - eps)
bins = var_config.bins
plot_labels = [var_config.var_labels[1], 
               "Events (POT={})".format(pot_str), ""]

plot_type = "topology"
save_name = save_fig_dir + "/selected-muon_direction_{}.png".format(plot_type)
ret_hist_topo = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

plot_type = "genie"
save_name = save_fig_dir + "/selected-muon_direction_{}.png".format(plot_type)
ret_hist_genie = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

In [None]:
# proton z-direction
var_config = VariableConfig.proton_direction()
evtdf = mc_evt_df_mup
vardf = mc_p_df.pfp.trk.dir.z
vardf_data = data_p_df.pfp.trk.dir.z
vardf_intime = intime_p_df.pfp.trk.dir.z
vardf = np.clip(vardf, var_config.bins[0], var_config.bins[-1] - eps)
vardf_data = np.clip(vardf_data, var_config.bins[0], var_config.bins[-1] - eps)
vardf_intime = np.clip(vardf_intime, var_config.bins[0], var_config.bins[-1] - eps)
bins = var_config.bins
plot_labels = [var_config.var_labels[1], 
               "Events (POT={})".format(pot_str), ""]

plot_type = "topology"
save_name = save_fig_dir + "/selected-proton_direction_{}.png".format(plot_type)
ret_hist_topo = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

plot_type = "genie"
save_name = save_fig_dir + "/selected-proton_direction_{}.png".format(plot_type)
ret_hist_genie = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

In [None]:
P_mu_col = pad_column_name(("rangeP", "p_muon"), mc_mu_df.pfp.trk)
P_p_col = pad_column_name(("rangeP", "p_proton"), mc_p_df.pfp.trk)

mc_ret_tki = get_cc1p0pi_tki(mc_mu_df.pfp.trk, mc_p_df.pfp.trk, P_mu_col, P_p_col)
data_ret_tki = get_cc1p0pi_tki(data_mu_df.pfp.trk, data_p_df.pfp.trk, P_mu_col, P_p_col)
intime_ret_tki = get_cc1p0pi_tki(intime_mu_df.pfp.trk, intime_p_df.pfp.trk, P_mu_col, P_p_col)

In [None]:
mc_ret_tki.keys()

In [None]:
# Muon z-direction
evtdf = mc_evt_df_mup
tki_name = "del_Tp"
# var_config = VariableConfig.tki_del_Tp()
vardf = mc_ret_tki[tki_name]
vardf_data = data_ret_tki[tki_name]
vardf_intime = intime_ret_tki[tki_name]
vardf = np.clip(vardf, bins[0], bins[-1] - eps)
vardf_data = np.clip(vardf_data, bins[0], bins[-1] - eps)
vardf_intime = np.clip(vardf_intime, bins[0], bins[-1] - eps)
# bins = var_config.bins
bins = np.linspace(0, 1, 21)
plot_labels = [r"$\delta p_T$ [GeV/c]",
               "Events (POT={})".format(pot_str), ""]

plot_type = "topology"
save_name = save_fig_dir + "/selected-tki-del_Tp_{}.png".format(plot_type)
ret_hist_topo = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

plot_type = "genie"
save_name = save_fig_dir + "/selected-tki-del_Tp_{}.png".format(plot_type)
ret_hist_genie = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

In [None]:
# Muon z-direction
evtdf = mc_evt_df_mup
tki_name = "del_p"
# var_config = VariableConfig.tki_del_p()
vardf = mc_ret_tki[tki_name]
vardf_data = data_ret_tki[tki_name]
vardf_intime = intime_ret_tki[tki_name]
vardf = np.clip(vardf, bins[0], bins[-1] - eps)
vardf_data = np.clip(vardf_data, bins[0], bins[-1] - eps)
vardf_intime = np.clip(vardf_intime, bins[0], bins[-1] - eps)
# bins = var_config.bins
bins = np.linspace(0, 1, 21)
plot_labels = [r"$\delta p$ [GeV/c]",
               "Events (POT={})".format(pot_str), ""]

plot_type = "topology"
save_name = save_fig_dir + "/selected-tki-del_p_{}.png".format(plot_type)
ret_hist_topo = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

plot_type = "genie"
save_name = save_fig_dir + "/selected-tki-del_p_{}.png".format(plot_type)
ret_hist_genie = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

In [None]:
vardf = mc_ret_tki[tki_name] * 180 / np.pi
vardf
bins

In [None]:
# Muon z-direction
evtdf = mc_evt_df_mup
tki_name = "del_alpha"
var_config = VariableConfig.tki_del_alpha()
vardf = mc_ret_tki[tki_name] * 180 / np.pi
vardf_data = data_ret_tki[tki_name] * 180 / np.pi
vardf_intime = intime_ret_tki[tki_name] * 180 / np.pi
bins = var_config.bins
vardf = np.clip(vardf, bins[0], bins[-1] - eps)
vardf_data = np.clip(vardf_data, bins[0], bins[-1] - eps)
vardf_intime = np.clip(vardf_intime, bins[0], bins[-1] - eps)
# bins = np.linspace(0, np.pi, 21)
plot_labels = [r"$\delta \alpha_T$ [deg]",
               "Events (POT={})".format(pot_str), ""]

plot_type = "topology"
save_name = save_fig_dir + "/selected-tki-del_alpha_{}.png".format(plot_type)
ret_hist_topo = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

plot_type = "genie"
save_name = save_fig_dir + "/selected-tki-del_alpha_{}.png".format(plot_type)
ret_hist_genie = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

In [None]:
# Muon z-direction
evtdf = mc_evt_df_mup
tki_name = "del_phi"
var_config = VariableConfig.tki_del_phi()
vardf = mc_ret_tki[tki_name] * 180 / np.pi
vardf_data = data_ret_tki[tki_name] * 180 / np.pi
vardf_intime = intime_ret_tki[tki_name] * 180 / np.pi
bins = var_config.bins
vardf = np.clip(vardf, bins[0], bins[-1] - eps)
vardf_data = np.clip(vardf_data, bins[0], bins[-1] - eps)
vardf_intime = np.clip(vardf_intime, bins[0], bins[-1] - eps)
# bins = var_config.bins
plot_labels = [r"$\delta \phi_T$ [deg]",
               "Events (POT={})".format(pot_str), ""]

plot_type = "topology"
save_name = save_fig_dir + "/selected-tki-del_phi_{}.png".format(plot_type)
ret_hist_topo = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

plot_type = "genie"
save_name = save_fig_dir + "/selected-tki-del_phi_{}.png".format(plot_type)
ret_hist_genie = hist_plot(plot_type,
                          evtdf, vardf, 
                          vardf_data, vardf_intime,
                          bins, 
                          plot_labels,
                          save_fig=save_fig, save_name=save_name)

## Kinematic Reconstruction

### Vertex Position

In [None]:
# vertex reconstruction performance for neutrino slices
fig, axs = plt.subplots(1,3, figsize=(16,4))
for i, axis in enumerate(["x", "y", "z"]):
    var = mc_evt_df[IsNu(mc_evt_df)].slc.vertex[axis] - mc_evt_df[IsNu(mc_evt_df)].slc.truth.position[axis]
    bins = np.linspace(-5, 5, 101)
    # bins = 100
    axs[i].hist(var, bins=bins)
    axs[i].set_xlabel("(Reco-True) in {} Coordinate [cm]".format(axis))
    axs[i].set_ylabel("Slices")

plt.suptitle("(Reconstructed Slice Vertex - True Interaction Position) of Neutrino Events", y=1.02)
save_name = save_fig_dir + "/vertex_reco_res_3d.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

### Momentum

In [None]:
import matplotlib.colors as mcolors
from matplotlib.transforms import Bbox

In [None]:
mc_mu_df[("pfp", "trk", "truth", "p", "totp", "")] = np.sqrt(mc_mu_df[("pfp", "trk", "truth", "p", "genp", "x")]**2 + mc_mu_df[("pfp", "trk", "truth", "p", "genp", "y")]**2 + mc_mu_df[("pfp", "trk", "truth", "p", "genp", "z")]**2)
mc_p_df[("pfp", "trk", "truth", "p", "totp", "")] = np.sqrt(mc_p_df[("pfp", "trk", "truth", "p", "genp", "x")]**2 + mc_p_df[("pfp", "trk", "truth", "p", "genp", "y")]**2 + mc_p_df[("pfp", "trk", "truth", "p", "genp", "z")]**2)
mc_mu_df[("pfp", "trk", "truth", "p", "dir", "z")] = mc_mu_df[("pfp", "trk", "truth", "p", "genp", "z")] / mc_mu_df[("pfp", "trk", "truth", "p", "totp")]
mc_p_df[("pfp", "trk", "truth", "p", "dir", "z")] = mc_p_df[("pfp", "trk", "truth", "p", "genp", "z")] / mc_p_df[("pfp", "trk", "truth", "p", "totp")]

In [None]:
mc_mu_df.pfp.trk.truth.p

In [None]:
var_reco = mc_mu_df[mc_mu_df.pfp.trk.truth.p.pdg == 13].pfp.trk.rangeP.p_muon
var_true = mc_mu_df[mc_mu_df.pfp.trk.truth.p.pdg == 13].pfp.trk.truth.p.totp
plot_labels = ["Reconstructed $P_{\\mu}$ [GeV/c]", "True $P_{\\mu}$ [GeV/c]", "True Muons of 2-prong Slices"]
plot_labels_frac = ["$(P_{\\mu}^{reco} - P_{\\mu}^{true}) / P_{\\mu}^{true}$"]
bins = np.linspace(0.22, 1, 41)

fig, ax = plt.subplots(2,1, figsize=(7,8), height_ratios=[4,1.2])

im = ax[0].hist2d(var_reco, var_true, bins=bins, norm=mcolors.LogNorm(), cmap="viridis")
ax[0].set_xlabel(plot_labels[0])
ax[0].set_ylabel(plot_labels[1])
ax[0].plot(bins, bins, color="red", linestyle="--", alpha=0.5)
ax[0].set_title(plot_labels[2])
plt.colorbar(im[3], ax=ax[0], label="Tracks")

mean_list = []
sig_list = []
for i in range(len(bins)-1):
    this_cut = (var_reco > bins[i]) & (var_reco < bins[i+1])
    this_diff = (var_reco[this_cut] - var_true[this_cut])/var_true[this_cut]
    this_diff = this_diff[~np.isnan(this_diff) & ~np.isinf(this_diff)]
    this_diff = np.clip(this_diff, -1, 1)
    mean_list.append(np.median(this_diff))
    sig_list.append(np.std(this_diff))
bin_centers = (bins[:-1] + bins[1:])/2
ax[1].errorbar(bin_centers, mean_list, yerr=sig_list, fmt="o", color="black")
# Match the size and position of the lower subplot to the upper subplot

# Set x-limits to match
ax[1].set_xlim(ax[0].get_xlim())

# Match subplot width and position
pos0 = ax[0].get_position()
pos1 = ax[1].get_position()
ax[1].set_position([pos0.x0, pos1.y0, pos0.width, pos1.height])
ax[1].axhline(0, color="red", linestyle="--", alpha=0.5)
ax[1].set_xlabel(plot_labels[0])
ax[1].set_ylabel(plot_labels_frac[0])

save_name = save_fig_dir + "/kin_reco-P_res-mu_rangeP.pdf"
if save_fig:
    plt.savefig(save_name, bbox_inches="tight")
plt.show();

In [None]:
var_reco = mc_p_df[mc_p_df.pfp.trk.truth.p.pdg == 2212].pfp.trk.rangeP.p_proton
var_true = mc_p_df[mc_p_df.pfp.trk.truth.p.pdg == 2212].pfp.trk.truth.p.totp
plot_labels = ["Reconstructed $P_{p}$ [GeV/c]", "True $P_{p}$ [GeV/c]", "True Protons of 2-prong Slices"]
plot_labels_frac = ["$(P_{p}^{reco} - P_{p}^{true}) / P_{p}^{true}$"]
bins = np.linspace(0.3, 1, 41)

fig, ax = plt.subplots(2,1, figsize=(7,8), height_ratios=[4,1.2])

im = ax[0].hist2d(var_reco, var_true, bins=bins, norm=mcolors.LogNorm(), cmap="viridis")
ax[0].set_xlabel(plot_labels[0])
ax[0].set_ylabel(plot_labels[1])
ax[0].plot(bins, bins, color="red", linestyle="--", alpha=0.5)
ax[0].set_title(plot_labels[2])
plt.colorbar(im[3], ax=ax[0], label="Tracks")

mean_list = []
sig_list = []
for i in range(len(bins)-1):
    this_cut = (var_reco > bins[i]) & (var_reco < bins[i+1])
    this_diff = (var_reco[this_cut] - var_true[this_cut])/var_true[this_cut]
    this_diff = this_diff[~np.isnan(this_diff) & ~np.isinf(this_diff)]
    this_diff = np.clip(this_diff, -1, 1)
    mean_list.append(np.median(this_diff))
    sig_list.append(np.std(this_diff))
bin_centers = (bins[:-1] + bins[1:])/2
ax[1].errorbar(bin_centers, mean_list, yerr=sig_list, fmt="o", color="black")
# Match the size and position of the lower subplot to the upper subplot

# Set x-limits to match
ax[1].set_xlim(ax[0].get_xlim())

# Match subplot width and position
pos0 = ax[0].get_position()
pos1 = ax[1].get_position()
ax[1].set_position([pos0.x0, pos1.y0, pos0.width, pos1.height])
ax[1].axhline(0, color="red", linestyle="--", alpha=0.5)
ax[1].set_xlabel(plot_labels[0])
ax[1].set_ylabel(plot_labels_frac[0])

save_name = save_fig_dir + "/kin_reco-P_res-p_rangeP.pdf"
if save_fig:
    plt.savefig(save_name, bbox_inches="tight")
plt.show();

### Direction

In [None]:
var_reco = np.arccos(mc_mu_df[mc_mu_df.pfp.trk.truth.p.pdg == 13].pfp.trk.dir.z) * 180/np.pi
var_true = np.arccos(mc_mu_df[mc_mu_df.pfp.trk.truth.p.pdg == 13].pfp.trk.truth.p.dir.z) * 180/np.pi
plot_labels = ["Reconstructed $\\theta_{\\mu}$ [deg]", "True $\\theta_{\\mu}$ [deg]", "True Muons of 2-prong Slices"]
plot_labels_frac = ["$\\theta_{\\mu}^{reco} - \\theta_{\\mu}^{true}$"]
# bins = np.linspace(0, np.pi, 41)
bins = np.linspace(0, 180, 41)

fig, ax = plt.subplots(2,1, figsize=(7,8), height_ratios=[4,1.2])

im = ax[0].hist2d(var_reco, var_true, bins=bins, norm=mcolors.LogNorm(), cmap="viridis")
ax[0].set_xlabel(plot_labels[0])
ax[0].set_ylabel(plot_labels[1])
ax[0].plot(bins, bins, color="red", linestyle="--", alpha=0.5)
ax[0].set_title(plot_labels[2])
plt.colorbar(im[3], ax=ax[0], label="Tracks")

mean_list = []
sig_list = []
for i in range(len(bins)-1):
    this_cut = (var_reco > bins[i]) & (var_reco < bins[i+1])
    this_diff = (var_reco[this_cut] - var_true[this_cut])
    this_diff = this_diff[~np.isnan(this_diff) & ~np.isinf(this_diff)]
    this_diff = np.clip(this_diff, -1, 1)
    mean_list.append(np.median(this_diff))
    sig_list.append(np.std(this_diff))
bin_centers = (bins[:-1] + bins[1:])/2
ax[1].errorbar(bin_centers, mean_list, yerr=sig_list, fmt="o", color="black")
# Match the size and position of the lower subplot to the upper subplot

# Set x-limits to match
ax[1].set_xlim(ax[0].get_xlim())

# Match subplot width and position
pos0 = ax[0].get_position()
pos1 = ax[1].get_position()
ax[1].set_position([pos0.x0, pos1.y0, pos0.width, pos1.height])
ax[1].axhline(0, color="red", linestyle="--", alpha=0.5)
ax[1].set_xlabel(plot_labels[0])
ax[1].set_ylabel(plot_labels_frac[0])

save_name = save_fig_dir + "/kin_reco-thetaz_res-mu.pdf"
if save_fig:
    plt.savefig(save_name, bbox_inches="tight")
plt.show();

In [None]:
var_reco = np.arccos(mc_p_df[mc_p_df.pfp.trk.truth.p.pdg == 2212].pfp.trk.dir.z) * 180/np.pi
var_true = np.arccos(mc_p_df[mc_p_df.pfp.trk.truth.p.pdg == 2212].pfp.trk.truth.p.dir.z) * 180/np.pi
plot_labels = ["Reconstructed $\\theta_{p}$ [deg]", "True $\\theta_{p}$ [deg]", "True Protons of 2-prong Slices"]
plot_labels_frac = ["$\\theta_{p}^{reco} - \\theta_{p}^{true}$"]
bins = np.linspace(0, 180, 41)

fig, ax = plt.subplots(2,1, figsize=(7,8), height_ratios=[4,1.2])

im = ax[0].hist2d(var_reco, var_true, bins=bins, norm=mcolors.LogNorm(), cmap="viridis")
ax[0].set_xlabel(plot_labels[0])
ax[0].set_ylabel(plot_labels[1])
ax[0].plot(bins, bins, color="red", linestyle="--", alpha=0.5)
ax[0].set_title(plot_labels[2])
plt.colorbar(im[3], ax=ax[0], label="Tracks")

mean_list = []
sig_list = []
for i in range(len(bins)-1):
    this_cut = (var_reco > bins[i]) & (var_reco < bins[i+1])
    this_diff = (var_reco[this_cut] - var_true[this_cut])
    this_diff = this_diff[~np.isnan(this_diff) & ~np.isinf(this_diff)]
    this_diff = np.clip(this_diff, -1, 1)
    mean_list.append(np.median(this_diff))
    sig_list.append(np.std(this_diff))
bin_centers = (bins[:-1] + bins[1:])/2
ax[1].errorbar(bin_centers, mean_list, yerr=sig_list, fmt="o", color="black")
# Match the size and position of the lower subplot to the upper subplot

# Set x-limits to match
ax[1].set_xlim(ax[0].get_xlim())

# Match subplot width and position
pos0 = ax[0].get_position()
pos1 = ax[1].get_position()
ax[1].set_position([pos0.x0, pos1.y0, pos0.width, pos1.height])
ax[1].axhline(0, color="red", linestyle="--", alpha=0.5)
ax[1].set_xlabel(plot_labels[0])
ax[1].set_ylabel(plot_labels_frac[0])

save_name = save_fig_dir + "/kin_reco-thetaz_res-p.pdf"
if save_fig:
    plt.savefig(save_name, bbox_inches="tight")
plt.show();

# Summary

In [None]:
stage_labels = [
    "All reconstructed slices",
    "Not clear cosmic",
    "Vertex in fiducial volume",
    "Nu-score > {}".format(nu_score_th),
    "Has exactly 2 PFPs",
    "Both PFPs contained",
    "Both PFPs have track score > {}".format(trackscore_th),
    "Both track \n(start position - vertex) < {} cm".format(dist_th),
    "One track is muon-like",
    "The other is proton-like"
]

In [None]:
# Patch
from matplotlib.patches import Patch


# Flip the order of stages
stages = list(perc_dict_topo.keys())[::-1]

# Number of stages
n_stages = len(stages)
bar_width = 0.3
y = np.arange(n_stages)

# Prepare data for topo and genie (in flipped order)
topo_data = np.array([perc_dict_topo[stage] for stage in stages])
genie_data = np.array([perc_dict_genie[stage] for stage in stages])

# Flip the stacked order: reverse the stack component axis
topo_data = topo_data[:, ::-1]
genie_data = genie_data[:, ::-1]
topology_colors_flipped = topology_colors[::-1]
genie_colors_flipped = genie_colors[::-1]

# Plot
fig, ax = plt.subplots(figsize=(10, 10))

# For legend handles
topo_handles = []
genie_handles = []

# Stacked bars for topo (horizontal) - reversed stack order
left = np.zeros(n_stages)
for i in range(topo_data.shape[1]):
    color = topology_colors_flipped[i] if i < len(topology_colors_flipped) else None
    bar = ax.barh(
        y - bar_width/2,
        topo_data[:, i],
        bar_width,
        left=left,
        color=color,
        label=None if i > 0 else "Topology (unhatched)"
    )
    if i == 0:
        topo_handles.append(bar)
    left += topo_data[:, i]

# Stacked bars for genie (horizontal) - reversed stack order
left = np.zeros(n_stages)
for i in range(genie_data.shape[1]):
    color = genie_colors_flipped[i] if i < len(genie_colors_flipped) else None
    bar = ax.barh(
        y + bar_width/2,
        genie_data[:, i],
        bar_width,
        left=left,
        color=color,
        label=None if i > 0 else "GENIE (light hatch)"
    )
    if i == 0:
        genie_handles.append(bar)
    left += genie_data[:, i]

# Y-axis labels and ticks (flipped order)
ax.set_yticks(y)
ax.set_yticklabels(stage_labels[::-1], rotation=0, va='center', fontsize=12)

# X-axis label
ax.set_xlabel("Percentage (%)")

# ax.set_title("Selected Slice Breakdown")

# Custom legend: note in legend that unhatched is topology and hatched is genie
# from matplotlib.patches import Patch
# legend_handles = [
#     Patch(facecolor=topology_colors_flipped[0], label="Topology (unhatched)"),
#     Patch(facecolor=genie_colors_flipped[0], hatch='x', alpha=0.7, label="GENIE (light hatch)")
# ]
# ax.legend(handles=legend_handles, loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=2)

plt.tight_layout()

common_handles = []
common_colors = ["gray", "sienna", "crimson", "darkgreen"]
common_modes = ["Cosmic", r"Out FV $\nu$", r"In FV other $\nu$", r"In FV $\nu_{\mu}$ NC"]
for i in range(len(common_colors)):
    common_handles.append(Patch(facecolor=common_colors[i], label=common_modes[i]))

genie_spec_handles = []
genie_spec_colors = ["#BFB17C", "#D88A3B", "#2c7c94", "#390C1E", "#9b5580"]
genie_spec_modes = [r"In FV $\nu_{\mu}$ CC Other", r"In FV $\nu_{\mu}$ CC SIS/DIS", r"In FV $\nu_{\mu}$ CC RES", r"In FV $\nu_{\mu}$ CC MEC", r"In FV $\nu_{\mu}$ CC QE"]
for i in range(len(genie_spec_colors)):
    genie_spec_handles.append(Patch(facecolor=genie_spec_colors[i], label=genie_spec_modes[i]))

topo_spec_handles = []
topo_spec_colors = ["coral", "darkslateblue", "mediumslateblue"]
topo_spec_modes = [r"In FV $\nu_{\mu}$ CC Other", r"In FV $\nu_{\mu}$ CC Np0$\pi$", r"In FV $\nu_{\mu}$ CC 1p0$\pi$"]
for i in range(len(topo_spec_colors)):
    topo_spec_handles.append(Patch(facecolor=topo_spec_colors[i], label=topo_spec_modes[i]))

ax.legend(handles=common_handles, loc='upper left', 
          bbox_to_anchor=(0.01, 1.18), ncol=4, fontsize=12, frameon=False)
ax_1 = ax.twinx()
ax_1.legend(handles=genie_spec_handles[::-1], loc='upper left', 
            bbox_to_anchor=(0.01, 1.14), ncol=3, fontsize=12, frameon=False)
ax_2 = ax.twinx()
ax_2.legend(handles=topo_spec_handles[::-1], loc='upper left', 
            bbox_to_anchor=(0.01, 1.07), ncol=4, fontsize=12, frameon=False)
ax_1.set_yticks([])
ax_2.set_yticks([])

save_name = save_fig_dir + "/summary-selected_slice_breakdown.png"
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")

plt.show()

In [None]:
print(df_dict.keys())

In [None]:
def eff_err(num, den):
    """
    Binomial statistical error on efficiency: sqrt(eff * (1-eff) / den)
    where eff = num / den
    """
    eff = num / den
    return np.sqrt(eff * (1 - eff) / den)

    # sqrt(ε(1-ε)/n)

# def eff_err_prop(num_list, den_list):
#     err_list = []
#     for num, den in zip(num_list, den_list):
#         err_list.append(eff_err(num, den))

In [None]:
# efficiency plot as a function of a variable
var_name = ("mu", "totp") 
var_config = VariableConfig.muon_momentum()
var_savename = var_config.var_save_name
xlabel = var_config.var_labels[0]
bins = var_config.bins

fig, ax = plt.subplots()
ax_eff = ax.twinx()
keys = df_dict.keys()
tot_evts = len(df_dict["allreco"][IsNuInFV_NumuCC_1p0pi(df_dict["allreco"])])
rate_list = []
for kidx, key in enumerate(keys):
    stage_df = df_dict[key]
    this_color = "C" + str(kidx)
    is_signal = IsNuInFV_NumuCC_1p0pi(stage_df)
    stage_df = stage_df[is_signal]
    if key == "2prong-mup":
        print("final purity: {:.2f}%".format(100 * len(stage_df) / len(df_dict["2prong-mup"])))
    vardf = stage_df[var_name]
    # append integrated efficiency to the label
    int_eff = len(stage_df) / tot_evts
    this_label = stage_labels[kidx] + " ({:.2f}%)".format(int_eff*100)
    n, bins, _ = ax.hist(vardf, bins=bins, histtype="step", label=this_label, alpha=0.6)
    if key == "allreco":
        tot_rate = n
    this_eff = n / tot_rate
    this_eff_err = eff_err(n, tot_rate)
    bin_centers = (bins[:-1] + bins[1:])/2
    ax_eff.plot(bin_centers, this_eff, label=this_label, color=this_color)
    # ax_eff.errorbar(bins[:-1], this_eff, yerr=this_eff_err, fmt="o", color=this_color)

ax.set_xlabel(xlabel)
ax.set_ylabel("Events")
ax_eff.set_ylabel("Efficiency")
ax_eff.set_ylim(0, 1.05)

# plt.axvline(mu_low_th, color="red", linestyle="--")
plt.xlim(bins[0], bins[-1])
plt.legend(bbox_to_anchor=(1.15, 1.0))
save_name = save_fig_dir + "/efficiency_plot_{}.png".format(var_savename)
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# efficiency plot as a function of a variable
var_name = ("mu", "dir", "z") 
var_config = VariableConfig.muon_direction()
var_savename = var_config.var_save_name
xlabel = var_config.var_labels[0]
bins = var_config.bins

fig, ax = plt.subplots()
ax_eff = ax.twinx()
keys = df_dict.keys()
tot_evts = len(df_dict["allreco"][IsNuInFV_NumuCC_1p0pi(df_dict["allreco"])])
rate_list = []
for kidx, key in enumerate(keys):
    stage_df = df_dict[key]
    is_signal = IsNuInFV_NumuCC_1p0pi(stage_df)
    stage_df = stage_df[is_signal]
    if key == "2prong-mup":
        print("final purity: {:.2f}%".format(100 * len(stage_df) / len(df_dict["2prong-mup"])))
    vardf = stage_df[var_name]
    # append integrated efficiency to the label
    int_eff = len(stage_df) / tot_evts
    this_label = stage_labels[kidx] + " ({:.2f}%)".format(int_eff*100)
    n, bins, _ = ax.hist(vardf, bins=bins, histtype="step", label=this_label, alpha=0.6)
    if key == "allreco":
        tot_rate = n
    this_eff = n / tot_rate
    bin_centers = (bins[:-1] + bins[1:])/2
    ax_eff.plot(bin_centers, this_eff, label=this_label)

ax.set_xlabel(xlabel)
ax.set_ylabel("Events")
ax_eff.set_ylabel("Efficiency")
ax_eff.set_ylim(0, 1.05)

# plt.axvline(mu_low_th, color="red", linestyle="--")
plt.xlim(bins[0], bins[-1])
plt.legend(bbox_to_anchor=(1.15, 1.0))
save_name = save_fig_dir + "/efficiency_plot_{}.png".format(var_savename)
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
var_name = ("p", "totp") 
var_config = VariableConfig.proton_momentum()
var_savename = var_config.var_save_name
xlabel = var_config.var_labels[0]
bins = var_config.bins

fig, ax = plt.subplots()
ax_eff = ax.twinx()
keys = df_dict.keys()
tot_evts = len(df_dict["allreco"][IsNuInFV_NumuCC_1p0pi(df_dict["allreco"])])
rate_list = []
for kidx, key in enumerate(keys):
    stage_df = df_dict[key]
    is_signal = IsNuInFV_NumuCC_1p0pi(stage_df)
    stage_df = stage_df[is_signal]
    if key == "2prong-mup":
        print("final purity: {:.2f}%".format(100 * len(stage_df) / len(df_dict["2prong-mup"])))
    vardf = stage_df[var_name]
    # append integrated efficiency to the label
    int_eff = len(stage_df) / tot_evts
    this_label = stage_labels[kidx] + " ({:.2f}%)".format(int_eff*100)
    n, bins, _ = ax.hist(vardf, bins=bins, histtype="step", label=this_label, alpha=0.6)
    if key == "allreco":
        tot_rate = n
    this_eff = n / tot_rate
    bin_centers = (bins[:-1] + bins[1:])/2
    ax_eff.plot(bin_centers, this_eff, label=this_label)

ax.set_xlabel(xlabel)
ax.set_ylabel("Events")
ax_eff.set_ylabel("Efficiency")
ax_eff.set_ylim(0, 1.05)

# plt.axvline(p_low_th, color="red", linestyle="--")
plt.xlim(bins[0], bins[-1])
plt.legend(bbox_to_anchor=(1.15, 1.0))
save_name = save_fig_dir + "/efficiency_plot_{}.png".format(var_savename)
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# efficiency plot as a function of a variable
var_name = ("p", "dir", "z") 
var_config = VariableConfig.proton_direction()
var_savename = var_config.var_save_name
xlabel = var_config.var_labels[0]
bins = var_config.bins

fig, ax = plt.subplots()
ax_eff = ax.twinx()
keys = df_dict.keys()
tot_evts = len(df_dict["allreco"][IsNuInFV_NumuCC_1p0pi(df_dict["allreco"])])
rate_list = []
for kidx, key in enumerate(keys):
    stage_df = df_dict[key]
    is_signal = IsNuInFV_NumuCC_1p0pi(stage_df)
    stage_df = stage_df[is_signal]
    if key == "2prong-mup":
        print("final purity: {:.2f}%".format(100 * len(stage_df) / len(df_dict["2prong-mup"])))
    vardf = stage_df[var_name]
    # append integrated efficiency to the label
    int_eff = len(stage_df) / tot_evts
    this_label = stage_labels[kidx] + " ({:.2f}%)".format(int_eff*100)
    n, bins, _ = ax.hist(vardf, bins=bins, histtype="step", label=this_label, alpha=0.6)
    if key == "allreco":
        tot_rate = n
    this_eff = n / tot_rate
    bin_centers = (bins[:-1] + bins[1:])/2
    ax_eff.plot(bin_centers, this_eff, label=this_label)

ax.set_xlabel(xlabel)
ax.set_ylabel("Events")
ax_eff.set_ylabel("Efficiency")
ax_eff.set_ylim(0, 1.05)

# plt.axvline(mu_low_th, color="red", linestyle="--")
plt.xlim(bins[0], bins[-1])
plt.legend(bbox_to_anchor=(1.15, 1.0))
save_name = save_fig_dir + "/efficiency_plot_{}.png".format(var_savename)
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# efficiency plot as a function of a variable
var_name = ("del_Tp", "", "") 
var_config = VariableConfig.tki_del_Tp()
var_savename = "del_Tp"
xlabel = r"$\delta p_T$ [GeV/c]"
bins = np.linspace(0, 1, 21)

fig, ax = plt.subplots()
ax_eff = ax.twinx()
keys = df_dict.keys()
tot_evts = len(df_dict["allreco"][IsNuInFV_NumuCC_1p0pi(df_dict["allreco"])])
rate_list = []
for kidx, key in enumerate(keys):
    stage_df = df_dict[key]
    is_signal = IsNuInFV_NumuCC_1p0pi(stage_df)
    stage_df = stage_df[is_signal]
    if key == "2prong-mup":
        print("final purity: {:.2f}%".format(100 * len(stage_df) / len(df_dict["2prong-mup"])))
    vardf = stage_df[var_name]
    # append integrated efficiency to the label
    int_eff = len(stage_df) / tot_evts
    this_label = stage_labels[kidx] + " ({:.2f}%)".format(int_eff*100)
    n, bins, _ = ax.hist(vardf, bins=bins, histtype="step", label=this_label, alpha=0.6)
    if key == "allreco":
        tot_rate = n
    this_eff = n / tot_rate
    bin_centers = (bins[:-1] + bins[1:])/2
    ax_eff.plot(bin_centers, this_eff, label=this_label)

ax.set_xlabel(xlabel)
ax.set_ylabel("Events")
ax_eff.set_ylabel("Efficiency")
ax_eff.set_ylim(0, 1.05)

# plt.axvline(mu_low_th, color="red", linestyle="--")
plt.xlim(bins[0], bins[-1])
plt.legend(bbox_to_anchor=(1.15, 1.0))
save_name = save_fig_dir + "/efficiency_plot_{}.png".format(var_savename)
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# efficiency plot as a function of a variable
var_name = ("del_p", "", "") 
var_config = VariableConfig.tki_del_Tp()
var_savename = "del_p"
xlabel = r"$\delta p$ [GeV/c]"
bins = np.linspace(0, 1, 21)

fig, ax = plt.subplots()
ax_eff = ax.twinx()
keys = df_dict.keys()
tot_evts = len(df_dict["allreco"][IsNuInFV_NumuCC_1p0pi(df_dict["allreco"])])
rate_list = []
for kidx, key in enumerate(keys):
    stage_df = df_dict[key]
    is_signal = IsNuInFV_NumuCC_1p0pi(stage_df)
    stage_df = stage_df[is_signal]
    if key == "2prong-mup":
        print("final purity: {:.2f}%".format(100 * len(stage_df) / len(df_dict["2prong-mup"])))
    vardf = stage_df[var_name]
    # append integrated efficiency to the label
    int_eff = len(stage_df) / tot_evts
    this_label = stage_labels[kidx] + " ({:.2f}%)".format(int_eff*100)
    n, bins, _ = ax.hist(vardf, bins=bins, histtype="step", label=this_label, alpha=0.6)
    if key == "allreco":
        tot_rate = n
    this_eff = n / tot_rate
    bin_centers = (bins[:-1] + bins[1:])/2
    ax_eff.plot(bin_centers, this_eff, label=this_label)

ax.set_xlabel(xlabel)
ax.set_ylabel("Events")
ax_eff.set_ylabel("Efficiency")
ax_eff.set_ylim(0, 1.05)

# plt.axvline(mu_low_th, color="red", linestyle="--")
plt.xlim(bins[0], bins[-1])
plt.legend(bbox_to_anchor=(1.15, 1.0))
save_name = save_fig_dir + "/efficiency_plot_{}.png".format(var_savename)
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# efficiency plot as a function of a variable
var_name = ("del_alpha", "", "") 
var_config = VariableConfig.tki_del_alpha()
var_savename = "del_alpha"
xlabel = r"$\delta \alpha$ [rad]"
bins = np.linspace(0, np.pi, 21)

fig, ax = plt.subplots()
ax_eff = ax.twinx()
keys = df_dict.keys()
tot_evts = len(df_dict["allreco"][IsNuInFV_NumuCC_1p0pi(df_dict["allreco"])])
rate_list = []
for kidx, key in enumerate(keys):
    stage_df = df_dict[key]
    is_signal = IsNuInFV_NumuCC_1p0pi(stage_df)
    stage_df = stage_df[is_signal]
    if key == "2prong-mup":
        print("final purity: {:.2f}%".format(100 * len(stage_df) / len(df_dict["2prong-mup"])))
    vardf = stage_df[var_name]
    # append integrated efficiency to the label
    int_eff = len(stage_df) / tot_evts
    this_label = stage_labels[kidx] + " ({:.2f}%)".format(int_eff*100)
    n, bins, _ = ax.hist(vardf, bins=bins, histtype="step", label=this_label, alpha=0.6)
    if key == "allreco":
        tot_rate = n
    this_eff = n / tot_rate
    bin_centers = (bins[:-1] + bins[1:])/2
    ax_eff.plot(bin_centers, this_eff, label=this_label)

ax.set_xlabel(xlabel)
ax.set_ylabel("Events")
ax_eff.set_ylabel("Efficiency")
ax_eff.set_ylim(0, 1.05)

# plt.axvline(mu_low_th, color="red", linestyle="--")
plt.xlim(bins[0], bins[-1])
plt.legend(bbox_to_anchor=(1.15, 1.0))
save_name = save_fig_dir + "/efficiency_plot_{}.png".format(var_savename)
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# efficiency plot as a function of a variable
var_name = ("del_phi", "", "") 
var_config = VariableConfig.tki_del_phi()
var_savename = "del_phi"
xlabel = r"$\delta \phi$ [rad]"
bins = np.linspace(0, np.pi, 21)

fig, ax = plt.subplots()
ax_eff = ax.twinx()
keys = df_dict.keys()
tot_evts = len(df_dict["allreco"][IsNuInFV_NumuCC_1p0pi(df_dict["allreco"])])
rate_list = []
for kidx, key in enumerate(keys):
    stage_df = df_dict[key]
    is_signal = IsNuInFV_NumuCC_1p0pi(stage_df)
    stage_df = stage_df[is_signal]
    if key == "2prong-mup":
        print("final purity: {:.2f}%".format(100 * len(stage_df) / len(df_dict["2prong-mup"])))
    vardf = stage_df[var_name]
    # append integrated efficiency to the label
    int_eff = len(stage_df) / tot_evts
    this_label = stage_labels[kidx] + " ({:.2f}%)".format(int_eff*100)
    n, bins, _ = ax.hist(vardf, bins=bins, histtype="step", label=this_label, alpha=0.6)
    if key == "allreco":
        tot_rate = n
    this_eff = n / tot_rate
    bin_centers = (bins[:-1] + bins[1:])/2
    ax_eff.plot(bin_centers, this_eff, label=this_label)

ax.set_xlabel(xlabel)
ax.set_ylabel("Events")
ax_eff.set_ylabel("Efficiency")
ax_eff.set_ylim(0, 1.05)

# plt.axvline(mu_low_th, color="red", linestyle="--")
plt.xlim(bins[0], bins[-1])
plt.legend(bbox_to_anchor=(1.15, 1.0))
save_name = save_fig_dir + "/efficiency_plot_{}.png".format(var_savename)
if save_fig:
    plt.savefig(save_name, dpi=300, bbox_inches="tight")
plt.show()

In [None]:
this_df[(this_df.nmu_27MeV == 1) & (this_df.np_50MeV == 0) & (this_df.npi_30MeV == 0) & (this_df.npi0 == 0)][["nmu_27MeV", "np_50MeV", "npi_30MeV", "npi0"]]

In [None]:
# final state topology detailed breakdown
this_df = mc_evt_df[IsNuInFV_NumuCC_Np0pi(mc_evt_df)]
this_df[["nmu_27MeV", "np_50MeV", "npi_30MeV", "npi0"]]
# get unique combinations of nmu_27MeV, np_50MeV, npi_30MeV, npi0 and count how many events have each combination
unique_combinations = this_df[["nmu_27MeV", "np_50MeV", "npi_30MeV", "npi0"]].drop_duplicates()
unique_combinations_list = []
for i in unique_combinations.index:
    nmu_27MeV_val = unique_combinations.loc[i].nmu_27MeV
    np_50MeV_val = unique_combinations.loc[i].np_50MeV
    npi_30MeV_val = unique_combinations.loc[i].npi_30MeV
    npi0_val = unique_combinations.loc[i].npi0
    unique_combinations_list.append((int(nmu_27MeV_val), int(np_50MeV_val), int(npi_30MeV_val), int(npi0_val)))
print(unique_combinations_list)

# count how many events have each combination
perc_list = []
for uc in unique_combinations_list:
    print(uc)
    perc = len(this_df[(this_df.nmu_27MeV == uc[0]) & (this_df.np_50MeV == uc[1]) & (this_df.npi_30MeV == uc[2]) & (this_df.npi0 == uc[3])])/len(mc_evt_df)
    print(perc*100)
    perc_list.append(perc)
print(perc_list)

print(sum(perc_list))

In [None]:
mc_evt_df[IsNuInFV_NuOther(mc_evt_df)].pdg

In [None]:
mc_evt_df[IsNuInFV_NumuCC_Other(mc_evt_df)][["nmu_27MeV", "np_50MeV", "npi_30MeV", "npi0"]]