In [36]:
import sys, os
import getpass

import warnings
import numpy as np
import math
import uproot as uproot
import pandas as pd
from tqdm import tqdm

import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.colors as mcolors
import matplotlib.colors
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import ticker
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
from matplotlib import gridspec
import matplotlib.cm as cm
import dunestyle.matplotlib as dunestyle

import scipy.stats as stats
from scipy.stats import norm
from scipy.stats import median_abs_deviation
from scipy.interpolate import CubicSpline
from scipy.optimize import curve_fit
import scipy.linalg as la
import scipy.optimize as opt
from scipy.optimize import Bounds, LinearConstraint
from scipy.stats import chisquare

# Add the head directory to sys.path
workspace_root = os.getcwd()  
sys.path.insert(0, workspace_root + "/../../../")

import pyanalib.cc2p_reco_var as cc2preco
import pyanalib.pandas_helpers as ph
import pyanalib.stat_helpers as sh

np.seterr(divide='ignore', invalid='ignore', over='ignore')

user = getpass.getuser()
print(user)

show_plots = False

apapadop


# samples to be used

In [37]:
# dnn fall val I
mc_file = "/exp/sbnd/data/users/apapadop/dfs/v10_10_03_02/mc_MCP2025_FallValidation_DNN_prodgenie_corsika_proton_rockbox_sbnd_CV_caf_flat_caf_sbnd_150k.df"
mc_intime_file = "/exp/sbnd/data/users/apapadop/dfs/v10_10_03_02/mc_MCP2025_FallValidation_prodcorsika_proton_intime_sbnd_CV_caf_flat_caf_sbnd_1000.df"
data_file = "/exp/sbnd/data/users/apapadop/dfs/v10_10_03_02/data_MCP2025B_FullRun1_RollingDev_DNN_bnblight_v10_10_03_02_flatcaf_sbnd.df"

In [38]:
with pd.HDFStore(mc_file, "r") as store:
    print(store.keys())

mc_hdr_df = pd.read_hdf(mc_file, key='hdr_0')
mc_pot_df = pd.read_hdf(mc_file, key='pot_0')
mc_reco_df_ = pd.read_hdf(mc_file, key='evt_0')
mc_nudf_ = pd.read_hdf(mc_file, key='mcnu_0')

['/evt_0', '/hdr_0', '/mcnu_0', '/opflash_0', '/pot_0', '/split']


In [39]:
with pd.HDFStore(mc_intime_file, "r") as store:
    print(store.keys())

mc_intime_hdr_df = pd.read_hdf(mc_intime_file, key='hdr_0')

['/evt_0', '/hdr_0', '/mcnu_0', '/opflash_0', '/pot_0', '/split']


In [40]:
mc_intime_pot_df = pd.read_hdf(mc_intime_file, key='pot_0')

In [41]:
mc_intime_reco_df_ = pd.read_hdf(mc_intime_file, key='evt_0')

In [42]:
data_hdr_df = pd.read_hdf(data_file, key='hdr_0')
data_pot_df = pd.read_hdf(data_file, key='pot_0')
data_reco_df_ = pd.read_hdf(data_file, key='evt_0')

with pd.HDFStore(data_file, "r") as store:
    print(store.keys())

['/evt_0', '/hdr_0', '/mcnu_0', '/pot_0', '/split']


In [43]:
code_version = "v10_10_03_02"
sample_str = "2025B (" + code_version + ")"

In [44]:
mc_nudf_ = pd.read_hdf(mc_file, key='mcnu_0')

In [45]:
## POT and Gate accounting
#### == POT
mc_tot_pot = mc_hdr_df['pot'].sum()
data_tot_pot = data_hdr_df['pot'].sum()
data_tot_TOR860 = data_pot_df['TOR860'].sum()
data_tot_TOR875 = data_pot_df['TOR875'].sum()

#### == Gate
f_factor = 0.0754 ## -- page 6 of https://sbn-docdb.fnal.gov/cgi-bin/sso/RetrieveFile?docid=41013&filename=2025-05-08_Absolute-Normalization-Data-MC.pdf&version=4
data_gates = len(data_pot_df)
intime_gates = ngenevt_sum = mc_intime_hdr_df[mc_intime_hdr_df['first_in_subrun'] == 1]['ngenevt'].sum()

##### hack !!!!!
data_tot_pot = 4.5e18
data_tot_TOR860 = 4.5e18
data_tot_TOR875 = 4.5e18
data_gates = 9.55e5

print("mc_tot_pot: %e" %(mc_tot_pot))
print("data_tot_pot: %e" %(data_tot_pot))
print("data_tot_TOR860: %e" %(data_tot_TOR860))
print("data_tot_TOR875: %e" %(data_tot_TOR875))
target_pot = data_tot_pot
mc_pot_scale = target_pot / mc_tot_pot
print("target pot: %e" % (target_pot))
print("MC POT scale: %.3f" %(mc_pot_scale))

print("data_gates: %e" %(data_gates))
print("intime_gates: %e" %(intime_gates))
mc_gate_scale = (1. - f_factor) * (data_gates + 0.) / (intime_gates + 0.)
print("MC gate scale: %.3f" %(mc_gate_scale))

mc_tot_pot: 2.919240e+18
data_tot_pot: 4.500000e+18
data_tot_TOR860: 4.500000e+18
data_tot_TOR875: 4.500000e+18
target pot: 4.500000e+18
MC POT scale: 1.541
data_gates: 9.550000e+05
intime_gates: 1.000000e+05
MC gate scale: 8.830


In [46]:
#data_hdr_df.run.value_counts()

In [47]:
def add_n_slice_col(reco_df):
    df_reset = reco_df.reset_index()
    slc_counts = (
        df_reset[['__ntuple', 'entry', 'rec.slc..index']]
        .drop_duplicates()
        .groupby(['__ntuple', 'entry'])
        .size()
        .reset_index(name='n_slc_per_entry')
    )

    slc_counts.columns = pd.MultiIndex.from_tuples([
        ('__ntuple', '', '', '', '', ''),
        ('entry', '', '', '', '', ''),
        ('slc', 'n_slc_per_entry', '', '', '', '')
    ])
    df_reset = df_reset.merge(slc_counts, on=[('__ntuple', '', '', '', '', ''), ('entry', '', '', '', '', '')])
    df_reset = df_reset.set_index(["__ntuple", "entry", "rec.slc..index", "rec.slc.reco.pfp..index"], verify_integrity=True)
    return df_reset 

In [48]:
def get_n_recopid_per_slc(df):
    pid_series = df.pfp.trk.reco_pid
    this_df = pid_series.reset_index()

    muons = this_df[this_df["reco_pid"] == 13]
    protons = this_df[this_df["reco_pid"] == 2212]
    pions = this_df[this_df["reco_pid"] == 211]    

    muon_counts = muons.groupby(["__ntuple", "entry", "rec.slc..index"]).size().rename("n_mu")
    proton_counts = protons.groupby(["__ntuple", "entry", "rec.slc..index"]).size().rename("n_proton")
    pion_counts = pions.groupby(["__ntuple", "entry", "rec.slc..index"]).size().rename("n_pion")    

    this_df = this_df.merge(muon_counts, on=["__ntuple", "entry", "rec.slc..index"], how="left")
    this_df = this_df.merge(proton_counts, on=["__ntuple", "entry", "rec.slc..index"], how="left")
    this_df = this_df.merge(pion_counts, on=["__ntuple", "entry", "rec.slc..index"], how="left")    

    this_df["n_mu"] = this_df["n_mu"].fillna(0).astype(int)
    this_df["n_proton"] = this_df["n_proton"].fillna(0).astype(int)
    this_df["n_pion"] = this_df["n_pion"].fillna(0).astype(int)    

    this_df.set_index(["__ntuple", "entry", "rec.slc..index", "rec.slc.reco.pfp..index"], inplace=True)

    df[('muon_counter', '', '', '', '', '')] = this_df.n_mu
    df[('proton_counter', '', '', '', '', '')] = this_df.n_proton
    df[('pion_counter', '', '', '', '', '')] = this_df.n_pion    

    return df

In [49]:
def pass_slc_with_n_pfps(df, n = 3):
    
    group_levels = ['__ntuple', 'entry', 'rec.slc..index']
    
    # Count how many pfps per slice
    pfp_counts = df.groupby(level=group_levels).size()

    # Get only slices with exactly 3 pfps
    valid_slices = pfp_counts[pfp_counts == n].index

    # Apply the mask to original DataFrame
    df = df.loc[df.index.droplevel('rec.slc.reco.pfp..index').isin(valid_slices)]
    
    df = df[ (df["muon_counter"] == 1) & (df["proton_counter"] == 2) & (df["pion_counter"] == 0)]
    
    return df

In [50]:
mc_reco_df = mc_reco_df_
mc_nudf = mc_nudf_
mc_intime_reco_df = mc_intime_reco_df_
data_reco_df = data_reco_df_

In [51]:
#mc_nudf.nmu_27MeV.value_counts()
#mc_nudf.np_50MeV.value_counts()
#mc_nudf.npi.value_counts()
#mc_nudf.npi0.value_counts()
mc_nudf.pdg.value_counts()

pdg
 14    80731
-14     1066
 12      817
-12       31
Name: count, dtype: int64

In [52]:
is_fv = cc2preco.InFV(mc_nudf.position)
is_signal = cc2preco.Signal(mc_nudf)
is_cc1p0pi = cc2preco.cc1p0pi(mc_nudf)
is_cc2pNpi = cc2preco.cc2pNpi(mc_nudf)
is_cc1pMpi = cc2preco.cc1pMpi(mc_nudf)
is_cc1p1pi = cc2preco.cc1p1pi(mc_nudf)
is_out_range = cc2preco.out_range(mc_nudf)
is_bkg_other = cc2preco.bkg_other(mc_nudf)
is_cc = mc_nudf.iscc
genie_mode = mc_nudf.genie_mode
w = mc_nudf.w

try :
    nuint_categ = pd.Series(8, index=mc_nudf.index)
    topo_categ = pd.Series(8, index=mc_nudf.index)    

except Exception as e:
    print(f"Error init nuint_categ")
    print(f"Error init topo_categ")    

nuint_categ[~is_fv] = -1  # Out of FV
nuint_categ[is_fv & ~is_cc] = 0  # NC
nuint_categ[is_fv & is_cc & (genie_mode == 0)] = 1  # CCQE
nuint_categ[is_fv & is_cc & (genie_mode == 10)] = 2  # CCEMC
nuint_categ[is_fv & is_cc & (genie_mode == 1)] = 3  # CCRES
nuint_categ[is_fv & is_cc & (genie_mode == 2)] = 4  # CCDIS
nuint_categ[is_fv & is_cc & ~( (genie_mode == 0) | (genie_mode == 10) | (genie_mode == 1) | (genie_mode == 2) )] = 5  # CCCOH
mc_nudf['nuint_categ'] = nuint_categ

topo_categ[~is_fv] = -1  # Out of FV
topo_categ[is_fv & is_cc & is_signal] = 0  # cc2p0pi signal
topo_categ[is_fv & is_cc & is_cc1p0pi] = 1  # cc1p0pi bkg
topo_categ[is_fv & is_cc & is_cc1p1pi] = 2  # cc1p1pi bkg
topo_categ[is_fv & is_cc & is_cc2pNpi] = 3  # cc2pNpi bkg
topo_categ[is_fv & is_cc & is_cc1pMpi] = 4  # cc1pMpi bkg
#topo_categ[is_fv & is_cc & is_out_range] = 5  # outside momentum range  
topo_categ[is_fv & is_cc & is_bkg_other] = 5  # other cc bkgs   
topo_categ[is_fv & ~is_cc] = 6  # NC        
mc_nudf['topo_categ'] = topo_categ



In [53]:
mc_reco_df = add_n_slice_col(mc_reco_df)
mc_intime_reco_df = add_n_slice_col(mc_intime_reco_df)
data_reco_df = add_n_slice_col(data_reco_df)

In [54]:
mc_pid_result_series = mc_reco_df.apply(cc2preco.get_pid_result, axis=1)
mc_reco_df[('pfp', 'trk', 'reco_pid', '', '', '')] = mc_pid_result_series

data_pid_result_series = data_reco_df.apply(cc2preco.get_pid_result, axis=1)
data_reco_df[('pfp', 'trk', 'reco_pid', '', '', '')] = data_pid_result_series

mc_intime_pid_result_series = mc_intime_reco_df.apply(cc2preco.get_pid_result, axis=1)
mc_intime_reco_df[('pfp', 'trk', 'reco_pid', '', '', '')] = mc_intime_pid_result_series

In [55]:
mc_reco_df = get_n_recopid_per_slc(mc_reco_df)
data_reco_df = get_n_recopid_per_slc(data_reco_df)
mc_intime_reco_df = get_n_recopid_per_slc(mc_intime_reco_df)

In [56]:
# truth-matching for mc bnb cosmic
mc_nudf.columns = pd.MultiIndex.from_tuples([('gen',) + col if isinstance(col, tuple) else ('gen', col) for col in mc_nudf.columns])
mc_nudf.columns = pd.MultiIndex.from_tuples([
    col + ('',) * (6 - len(col)) for col in mc_nudf.columns
])
mc_reco_df = ph.multicol_merge(mc_reco_df.reset_index(), mc_nudf.reset_index(),
                            left_on=[('__ntuple', '', '', '', '', ''), ('entry', '', '', '', '', ''), ('slc','tmatch', 'idx', '', '', '')],
                            right_on=[('__ntuple', '', '', '', '', ''), ('entry', '', '', '', '', ''), ('rec.mc.nu..index', '','', '', '', '')], 
                            how="left") ## -- save all sllices
mc_reco_df = mc_reco_df.set_index(["__ntuple", "entry", "rec.slc..index", "rec.slc.reco.pfp..index"], verify_integrity=True)

mc_reco_df.loc[mc_reco_df[('gen', 'nuint_categ', '', '', '', '')].isna(), [('gen', 'nuint_categ', '', '', '', '')]] = -2
mc_reco_df.loc[mc_reco_df[('gen', 'topo_categ', '', '', '', '')].isna(), [('gen', 'topo_categ', '', '', '', '')]] = -2


In [57]:
mc_intime_reco_df[('gen', 'nuint_categ', '', '', '', '')] = -3
mc_intime_reco_df[('gen', 'topo_categ', '', '', '', '')] = -3

In [58]:
# selected true neutrino events topological breakdown
topo_mode_list = [0, 1, 2, 3, 4, 5, 6, -1, -2, -3]
topo_mode_labels = [r"$\mathrm{CC2p0}\pi$",r"$\mathrm{CC1p0}\pi$",r"$\mathrm{CC1p1}\pi$",r"$\mathrm{CC2pN}\pi$",r"$\mathrm{CC1pM}\pi$", "other bkgs", "NC","non-FV", "mc cosmics", "intime cosmics"]

topo_colors = ['#d62728',  # Red   
           '#2ca02c',  # Green  
           '#9467bd',  # Purple 
           '#17becf',  # Teal                     
           "#11e211",  # light green
           '#1f77b4',  # Blue
           #"#4e3548",  # black-ish           
           '#ff7f0e',  # Orange
           '#e377c2',  # Pink
           '#7f7f7f',  # Gray
           "#bdbd22"]  # Yellow-green

# selected true neutrino events interaction breakdown

mode_list = [0, 1, 2, 3, 4, 5, -1, -2, -3]
mode_labels = ["NC", "CCQE", "CCMEC", "CCRES", "CCDIS", "CCCOH", "non-FV", "mc cosmics", "intime cosmics"]          
          
colors = ['#ff7f0e',  # Orange
           '#2ca02c',  # Green
           '#d62728',  # Red
           '#9467bd',  # Purple
           '#8c564b',  # Brown
           '#17becf',  # Teal                    
           '#e377c2',  # Pink
           '#7f7f7f',  # Gray
           '#bcbd22']  # Yellow-green

In [59]:
def draw_reco_stacked_hist(var_mc_bnb_cosmic, var_mc_intime_cosmic, is_logx, is_logy,
                           title_x, title_y, x_min, x_max, nbins, outname,
                           data_overlay=False, var_data=[], draw_density=False):
    
    ## Define the output figure to have two pads
    fig = plt.figure(figsize=(8.5, 8), dpi=100)
    gs = gridspec.GridSpec(2, 1, height_ratios=[5, 1], hspace=0.10)
    ax_main = fig.add_subplot(gs[0])
    ax_ratio = fig.add_subplot(gs[1], sharex=ax_main)
    
    if is_logx:
        ax_main.set_xscale('log')
        ax_ratio.set_xscale('log')        
    if is_logy:
        ax_main.set_yscale('log')


    ax_main.set_xlabel("")  # Only bottom has x-label
    ax_main.set_ylabel(title_y)
    ax_ratio.set_ylabel("Data/MC", fontsize=12)
    ax_ratio.set_xlabel(title_x, fontsize = 20)

    ax_ratio.axhline(1.0, color='red', linestyle='--', linewidth=1)
    ax_ratio.set_ylabel("Data/MC", fontsize=12)
    ax_ratio.set_xlabel(title_x, fontsize=12)
    ax_ratio.set_ylim(0.4, 1.6)
    ax_ratio.tick_params(width=2, length=6)
    for spine in ax_ratio.spines.values():
        spine.set_linewidth(2)

    plt.setp(ax_main.get_xticklabels(), visible=False)

    ## Define scaling parameters
    POT_scale = 1.
    Gate_scale = 1.
    if not draw_density:
        POT_scale = mc_pot_scale
        Gate_scale = mc_gate_scale

    ## Define binning
    if is_logx:
        bins = np.logspace(np.log10(x_min), np.log10(x_max), nbins + 1)
        bin_centers = np.sqrt(bins[:-1] * bins[1:])
    else:
        bins = np.linspace(x_min, x_max, nbins + 1)
        bin_centers = 0.5 * (bins[:-1] + bins[1:])

    ## Define data for MC
    all_mc_data = var_mc_bnb_cosmic + var_mc_intime_cosmic
    all_weights = (
        [np.ones_like(data) * POT_scale for data in var_mc_bnb_cosmic] +
        [np.ones_like(data) * Gate_scale for data in var_mc_intime_cosmic]
    )
    each_mc_hist_data = [np.histogram(data, bins=bins, weights=w)[0] for data, w in zip(all_mc_data, all_weights)]
    total_mc = np.sum(each_mc_hist_data, axis=0)

    ## Plot stacked MC
    hist_data, bins, _ = ax_main.hist(all_mc_data,
                                      bins=bins,
                                      weights=all_weights,
                                      stacked=True,
                                      color=colors,
                                      label=mode_labels,
                                      edgecolor='none',
                                      linewidth=0,
                                      density=draw_density,
                                      histtype='stepfilled')

    max_y = np.max(total_mc)

    ## Plot MC stat error box
    each_mc_hist_data = []
    each_mc_hist_err2 = []  # sum of squared weights for error

    for data, w in zip(all_mc_data, all_weights):
        hist_vals, _ = np.histogram(data, bins=bins, weights=w)
        hist_err2, _ = np.histogram(data, bins=bins, weights=np.square(w))
        each_mc_hist_data.append(hist_vals)
        each_mc_hist_err2.append(hist_err2)

    total_mc = np.sum(each_mc_hist_data, axis=0)
    total_mc_err2 = np.sum(each_mc_hist_err2, axis=0)
    mc_stat_err = np.sqrt(total_mc_err2)
    #mc_stat_err = np.sqrt(total_mc)

    ax_main.bar(
       bin_centers,
        2 * mc_stat_err,
        width=np.diff(bins),
        bottom=total_mc - mc_stat_err,
        facecolor='none',             # transparent fill
        edgecolor='black',            # outline color of the hatching
        hatch='xxxx',                 # hatch pattern similar to ROOT's 3004
        linewidth=0.0,
        label='MC Stat. Unc.'
    )

    ax_main.tick_params(width=2, length=10)
    for spine in ax_main.spines.values():
        spine.set_linewidth(2)

    ## Draw Ratio error bar
    mc_stat_err_ratio = mc_stat_err / total_mc
    mc_content_ratio = total_mc / total_mc
    mc_stat_err_ratio = np.nan_to_num(mc_stat_err_ratio, nan=0.)
    mc_content_ratio = np.nan_to_num(mc_content_ratio, nan=-999.)
    ax_ratio.bar(
        bin_centers,
        2*mc_stat_err_ratio,
        width=np.diff(bins),
        bottom=mc_content_ratio - mc_stat_err_ratio,
        facecolor='none',             # transparent fill
        edgecolor='black',            # outline color of the hatching
        hatch='xxxx',                 # hatch pattern similar to ROOT's 3004
        linewidth=0.0,
        label='MC Stat. Unc.'
    )

    ## Draw data too
    if data_overlay:
        ax_main.set_ylabel("Events (POT = %.2e)" % target_pot)
        if draw_density:
            ax_main.set_ylabel("A.U.")

        ## Define data histogram
        counts, _ = np.histogram(var_data, bins=bins)

        bin_widths = np.diff(bins)
        total_data = np.sum(counts)
        norm_counts = counts
        data_eylow, data_eyhigh = sh.return_data_stat_err(counts)

        if draw_density:
            norm_counts = counts / (total_data * bin_widths)
            data_eylow = data_eylow / (total_data * bin_widths) if total_data > 0 else np.zeros_like(counts)
            data_eyhigh = data_eyhigh / (total_data * bin_widths) if total_data > 0 else np.zeros_like(counts)

        errors = data_eylow + data_eyhigh
        
        ## Plot data points on main histogram
        #ax_main.errorbar(bin_centers, norm_counts, yerr=errors,
        #                 fmt='o', color='black', label='Data',
        #                 markersize=5, capsize=3, linewidth=1.5)
        
        ax_main.errorbar(bin_centers, norm_counts,
                 yerr=np.vstack((data_eylow, data_eyhigh)),
                 fmt='o', color='black', label='Data',
                 markersize=5, capsize=3, linewidth=1.5)
        
        max_y_data = np.max(norm_counts + data_eyhigh)
        #print("max_y: %f" %(max_y))
        #print("max_y_data: %f" %(max_y_data))
        max_y = max(max_y, max_y_data)
        #print("max_y: %f" %(max_y))

        ## Make data/mc ratio plot
        data_ratio = norm_counts / total_mc
        data_ratio_eylow = data_eylow / total_mc
        data_ratio_eyhigh = data_eyhigh / total_mc
        data_ratio = np.nan_to_num(data_ratio, nan=-999.)
        data_ratio_eylow = np.nan_to_num(data_ratio_eylow, nan=0.)
        data_ratio_eyhigh = np.nan_to_num(data_ratio_eyhigh, nan=0.)
        
        #data_ratio_errors = data_ratio_eylow + data_ratio_eyhigh
        #ax_ratio.errorbar(bin_centers, data_ratio, yerr=data_ratio_errors,
        #                 fmt='o', color='black', label='Data',
        #                 markersize=5, capsize=3, linewidth=1.5)

        ax_ratio.errorbar(bin_centers, data_ratio,
                  yerr=np.vstack((data_ratio_eylow, data_ratio_eyhigh)),
                  fmt='o', color='black', label='Data',
                  markersize=5, capsize=3, linewidth=1.5)

    ## Set ax_main axes variables
    ax_main.set_xlim(x_min, x_max)
    ax_main.set_ylim(0.01, max_y * 1.5)
    if is_logy:
        ax_main.set_ylim(0.1, max_y * 600)
    
    # Legend with fractions
    accum_sum = [np.sum(data) for data in hist_data]
    accum_sum = [0.] + accum_sum
    total_sum = accum_sum[-1]
    individual_sums = [accum_sum[i + 1] - accum_sum[i] for i in range(len(accum_sum) - 1)]
    fractions = [(count / total_sum) * 100 for count in individual_sums]
    legend_labels = [f"{label} [{counts:.1f}] ({frac:.1f}%)" for counts, label, frac in zip(individual_sums[::-1],mode_labels[::-1], fractions[::-1])]
    
    if data_overlay:
        if draw_density:
            legend_labels.append("BNB data")
        else:
            legend_labels.append(f"total mc [{total_sum:.0f}]")
            legend_labels.append(f"BNB data [{total_data:.0f}]")
            
    ax_main.legend(legend_labels, loc='upper left', fontsize=10, frameon=False, ncol=3, bbox_to_anchor=(0.02, 0.98), columnspacing = 0.7)

    legend_labels_ratio = ["y=1", "MC (Stat. Only)", "Data/MC"]
    ax_ratio.legend(legend_labels_ratio, loc='upper left', fontsize=7, frameon=False, ncol=3, bbox_to_anchor=(0.02, 0.98))

    ax_main.text(0.00, 1.02, "SBND " + sample_str,
                 transform=ax_main.transAxes, fontsize=14, fontweight='bold')

    fig.savefig(f"/exp/sbnd/data/users/{user}/{code_version}/plots/2025B/{outname}.pdf", format='pdf', bbox_inches=None)    
    
    if show_plots:
        plt.show()
    else: 
        plt.close('all')

In [60]:
def topo_draw_reco_stacked_hist(var_mc_bnb_cosmic, var_mc_intime_cosmic, is_logx, is_logy,
                           title_x, title_y, x_min, x_max, nbins, outname,
                           data_overlay=False, var_data=[], draw_density=False):
    
    ## Define the output figure to have two pads
    fig = plt.figure(figsize=(8.5, 8), dpi=100)
    gs = gridspec.GridSpec(2, 1, height_ratios=[5, 1], hspace=0.10)
    ax_main = fig.add_subplot(gs[0])
    ax_ratio = fig.add_subplot(gs[1], sharex=ax_main)
    
    if is_logx:
        ax_main.set_xscale('log')
        ax_ratio.set_xscale('log')        
    if is_logy:
        ax_main.set_yscale('log')


    ax_main.set_xlabel("")  # Only bottom has x-label
    ax_main.set_ylabel(title_y)
    ax_ratio.set_ylabel("Data/MC", fontsize=12)
    ax_ratio.set_xlabel(title_x, fontsize = 20)

    ax_ratio.axhline(1.0, color='red', linestyle='--', linewidth=1)
    ax_ratio.set_ylabel("Data/MC", fontsize=12)
    ax_ratio.set_xlabel(title_x, fontsize=12)
    ax_ratio.set_ylim(0.4, 1.6)
    ax_ratio.tick_params(width=2, length=6)
    for spine in ax_ratio.spines.values():
        spine.set_linewidth(2)

    plt.setp(ax_main.get_xticklabels(), visible=False)

    ## Define scaling parameters
    POT_scale = 1.
    Gate_scale = 1.
    if not draw_density:
        POT_scale = mc_pot_scale
        Gate_scale = mc_gate_scale

    ## Define binning
    if is_logx:
        bins = np.logspace(np.log10(x_min), np.log10(x_max), nbins + 1)
        bin_centers = np.sqrt(bins[:-1] * bins[1:])
    else:
        bins = np.linspace(x_min, x_max, nbins + 1)
        bin_centers = 0.5 * (bins[:-1] + bins[1:])

    ## Define data for MC
    all_mc_data = var_mc_bnb_cosmic + var_mc_intime_cosmic
    all_weights = (
        [np.ones_like(data) * POT_scale for data in var_mc_bnb_cosmic] +
        [np.ones_like(data) * Gate_scale for data in var_mc_intime_cosmic]
    )
    each_mc_hist_data = [np.histogram(data, bins=bins, weights=w)[0] for data, w in zip(all_mc_data, all_weights)]
    total_mc = np.sum(each_mc_hist_data, axis=0)

    ## Plot stacked MC
    hist_data, bins, _ = ax_main.hist(all_mc_data,
                                      bins=bins,
                                      weights=all_weights,
                                      stacked=True,
                                      color=topo_colors,
                                      label=topo_mode_labels,
                                      edgecolor='none',
                                      linewidth=0,
                                      density=draw_density,
                                      histtype='stepfilled')

    max_y = np.max(total_mc)

    ## Plot MC stat error box
    each_mc_hist_data = []
    each_mc_hist_err2 = []  # sum of squared weights for error

    for data, w in zip(all_mc_data, all_weights):
        hist_vals, _ = np.histogram(data, bins=bins, weights=w)
        hist_err2, _ = np.histogram(data, bins=bins, weights=np.square(w))
        each_mc_hist_data.append(hist_vals)
        each_mc_hist_err2.append(hist_err2)

    total_mc = np.sum(each_mc_hist_data, axis=0)
    total_mc_err2 = np.sum(each_mc_hist_err2, axis=0)
    mc_stat_err = np.sqrt(total_mc_err2)
    #mc_stat_err = np.sqrt(total_mc)

    ax_main.bar(
       bin_centers,
        2 * mc_stat_err,
        width=np.diff(bins),
        bottom=total_mc - mc_stat_err,
        facecolor='none',             # transparent fill
        edgecolor='black',            # outline color of the hatching
        hatch='xxxx',                 # hatch pattern similar to ROOT's 3004
        linewidth=0.0,
        label='MC Stat. Unc.'
    )

    ax_main.tick_params(width=2, length=10)
    for spine in ax_main.spines.values():
        spine.set_linewidth(2)

    ## Draw Ratio error bar
    mc_stat_err_ratio = mc_stat_err / total_mc
    mc_content_ratio = total_mc / total_mc
    mc_stat_err_ratio = np.nan_to_num(mc_stat_err_ratio, nan=0.)
    mc_content_ratio = np.nan_to_num(mc_content_ratio, nan=-999.)
    ax_ratio.bar(
        bin_centers,
        2*mc_stat_err_ratio,
        width=np.diff(bins),
        bottom=mc_content_ratio - mc_stat_err_ratio,
        facecolor='none',             # transparent fill
        edgecolor='black',            # outline color of the hatching
        hatch='xxxx',                 # hatch pattern similar to ROOT's 3004
        linewidth=0.0,
        label='MC Stat. Unc.'
    )

    ## Draw data too
    if data_overlay:
        ax_main.set_ylabel("Events (POT = %.2e)" % target_pot)
        if draw_density:
            ax_main.set_ylabel("A.U.")

        ## Define data histogram
        counts, _ = np.histogram(var_data, bins=bins)

        bin_widths = np.diff(bins)
        total_data = np.sum(counts)
        norm_counts = counts
        data_eylow, data_eyhigh = sh.return_data_stat_err(counts)

        if draw_density:
            norm_counts = counts / (total_data * bin_widths)
            data_eylow = data_eylow / (total_data * bin_widths) if total_data > 0 else np.zeros_like(counts)
            data_eyhigh = data_eyhigh / (total_data * bin_widths) if total_data > 0 else np.zeros_like(counts)

        errors = data_eylow + data_eyhigh
        
        ## Plot data points on main histogram
        #ax_main.errorbar(bin_centers, norm_counts, yerr=errors,
        #                 fmt='o', color='black', label='Data',
        #                 markersize=5, capsize=3, linewidth=1.5)
        
        ax_main.errorbar(bin_centers, norm_counts,
                 yerr=np.vstack((data_eylow, data_eyhigh)),
                 fmt='o', color='black', label='Data',
                 markersize=5, capsize=3, linewidth=1.5)
        
        max_y_data = np.max(norm_counts + data_eyhigh)
        #print("max_y: %f" %(max_y))
        #print("max_y_data: %f" %(max_y_data))
        max_y = max(max_y, max_y_data)
        #print("max_y: %f" %(max_y))

        ## Make data/mc ratio plot
        data_ratio = norm_counts / total_mc
        data_ratio_eylow = data_eylow / total_mc
        data_ratio_eyhigh = data_eyhigh / total_mc
        data_ratio = np.nan_to_num(data_ratio, nan=-999.)
        data_ratio_eylow = np.nan_to_num(data_ratio_eylow, nan=0.)
        data_ratio_eyhigh = np.nan_to_num(data_ratio_eyhigh, nan=0.)
        
        #data_ratio_errors = data_ratio_eylow + data_ratio_eyhigh
        #ax_ratio.errorbar(bin_centers, data_ratio, yerr=data_ratio_errors,
        #                 fmt='o', color='black', label='Data',
        #                 markersize=5, capsize=3, linewidth=1.5)

        ax_ratio.errorbar(bin_centers, data_ratio,
                  yerr=np.vstack((data_ratio_eylow, data_ratio_eyhigh)),
                  fmt='o', color='black', label='Data',
                  markersize=5, capsize=3, linewidth=1.5)

    ## Set ax_main axes variables
    ax_main.set_xlim(x_min, x_max)
    ax_main.set_ylim(0.01, max_y * 1.5)
    if is_logy:
        ax_main.set_ylim(0.1, max_y * 600)
    
    # Legend with fractions
    accum_sum = [np.sum(data) for data in hist_data]
    accum_sum = [0.] + accum_sum
    total_sum = accum_sum[-1]
    individual_sums = [accum_sum[i + 1] - accum_sum[i] for i in range(len(accum_sum) - 1)]
    fractions = [(count / total_sum) * 100 for count in individual_sums]
    legend_labels = [f"{label} [{counts:.1f}] ({frac:.1f}%)" for counts, label, frac in zip(individual_sums[::-1], topo_mode_labels[::-1], fractions[::-1])]
    
    if data_overlay:
        if draw_density:
            legend_labels.append("BNB data")
        else:
            legend_labels.append(f"total mc [{total_sum:.0f}]")
            legend_labels.append(f"BNB data [{total_data:.0f}]")
            
    ax_main.legend(legend_labels, loc='upper left', fontsize=10, frameon=False, ncol=3, bbox_to_anchor=(0.02, 0.98), columnspacing = 0.7)

    legend_labels_ratio = ["y=1", "MC (Stat. Only)", "Data/MC"]
    ax_ratio.legend(legend_labels_ratio, loc='upper left', fontsize=7, frameon=False, ncol=3, bbox_to_anchor=(0.02, 0.98))

    ax_main.text(0.00, 1.02, "SBND " + sample_str,
                 transform=ax_main.transAxes, fontsize=14, fontweight='bold')

    fig.savefig(f"/exp/sbnd/data/users/{user}/{code_version}/plots/2025B/topo_{outname}.pdf", format='pdf', bbox_inches=None)     
    
    if show_plots:
        plt.show()
    else: 
        plt.close('all')

In [61]:
def draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, column, x_title, y_title, x_min, x_max, n_bins, out_name, is_logx = False, is_logy = False):
 
    nuint_categ_col = ('gen', 'nuint_categ', '', '', '', '')
 
    if "entry" in out_name:
        mc_bnb_cosmic_df_per_slc = mc_bnb_cosmic_df.groupby([('__ntuple'), ('entry')])[[column, nuint_categ_col]].first()
        mc_bnb_intime_df_per_slc = mc_bnb_intime_df.groupby([('__ntuple'), ('entry')])[[column, nuint_categ_col]].first()

        data_df_per_slc = data_df.groupby([('__ntuple'), ('entry')])[[column]].first()

    else:
        mc_bnb_cosmic_df_per_slc = mc_bnb_cosmic_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column, nuint_categ_col]].first()
        mc_bnb_intime_df_per_slc = mc_bnb_intime_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column, nuint_categ_col]].first()

        data_df_per_slc = data_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column]].first()

    mode_list_bnb_cosmic = [m for m in mode_list if m != -3]
    var_mc_bnb_cosmic = [mc_bnb_cosmic_df_per_slc[mc_bnb_cosmic_df_per_slc[nuint_categ_col] == mode][column]for mode in mode_list_bnb_cosmic]
    var_mc_bnb_intime = [mc_bnb_intime_df_per_slc[mc_bnb_intime_df_per_slc.gen.nuint_categ == -3][column]]
    var_data = data_df_per_slc[column]
    
    draw_reco_stacked_hist(var_mc_bnb_cosmic, var_mc_bnb_intime, is_logx, is_logy, x_title, y_title, x_min, x_max, n_bins, out_name, True, var_data)

In [62]:
def topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, column, x_title, y_title, x_min, x_max, n_bins, out_name, is_logx = False, is_logy = False):
     
    topo_categ_col = ('gen', 'topo_categ', '', '', '', '')
 
    if "entry" in out_name:
        mc_bnb_cosmic_df_per_slc = mc_bnb_cosmic_df.groupby([('__ntuple'), ('entry')])[[column, topo_categ_col]].first()
        mc_bnb_intime_df_per_slc = mc_bnb_intime_df.groupby([('__ntuple'), ('entry')])[[column, topo_categ_col]].first()

        data_df_per_slc = data_df.groupby([('__ntuple'), ('entry')])[[column]].first()

    else:
        mc_bnb_cosmic_df_per_slc = mc_bnb_cosmic_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column, topo_categ_col]].first()
        mc_bnb_intime_df_per_slc = mc_bnb_intime_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column, topo_categ_col]].first()

        data_df_per_slc = data_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column]].first()

    mode_list_bnb_cosmic = [m for m in topo_mode_list if m != -3]
    var_mc_bnb_cosmic = [mc_bnb_cosmic_df_per_slc[mc_bnb_cosmic_df_per_slc[topo_categ_col] == mode][column]for mode in mode_list_bnb_cosmic]
    var_mc_bnb_intime = [mc_bnb_intime_df_per_slc[mc_bnb_intime_df_per_slc.gen.topo_categ == -3][column]]
    var_data = data_df_per_slc[column]
    
    topo_draw_reco_stacked_hist(var_mc_bnb_cosmic, var_mc_bnb_intime, is_logx, is_logy, x_title, y_title, x_min, x_max, n_bins, out_name, True, var_data)

In [63]:
def draw_reco_valid_plots(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, suffix, is_logx = False, is_logy = False):
    ## draw 1) clear cosmic, 2) nu score, 3) vertex x,y and z
    
    ## -- 0) N(slc) per entry
    n_slc_col = ('slc', 'n_slc_per_entry', '', '', '', '')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, n_slc_col, "N(slice)", "A.U.", -0.5, 25.5, 26, suffix + "_entry_n_slc", is_logx, is_logy)
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, n_slc_col, "N(slice)", "A.U.", -0.5, 25.5, 26, suffix + "_entry_n_slc", is_logx, is_logy)    

    ## -- 1) Clear cosmic
    clear_cosmic_col = ('slc', 'is_clear_cosmic', '', '', '', '')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, clear_cosmic_col, "Is Clear Cosmic", "A.U.", -0.5, 1.5, 2, suffix + "_slc_is_clear_cosmic", is_logx, is_logy)
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, clear_cosmic_col, "Is Clear Cosmic", "A.U.", -0.5, 1.5, 2, suffix + "_slc_is_clear_cosmic", is_logx, is_logy)    

    ## -- 2) vertex x,y,z
    vtx_x_col = ('slc', 'vertex', 'x', '', '', '')
    vtx_y_col = ('slc', 'vertex', 'y', '', '', '')
    vtx_z_col = ('slc', 'vertex', 'z', '', '', '')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, vtx_x_col, "Slice Vertex X [cm]", "A.U.", -300, 300, 60, suffix + "_slc_vtx_x", is_logx, is_logy)
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, vtx_x_col, "Slice Vertex X [cm]", "A.U.", -300, 300, 60, suffix + "_slc_vtx_x", is_logx, is_logy)    
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, vtx_y_col, "Slice Vertex Y [cm]", "A.U.", -300, 300, 60, suffix + "_slc_vtx_y", is_logx, is_logy)
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, vtx_y_col, "Slice Vertex Y [cm]", "A.U.", -300, 300, 60, suffix + "_slc_vtx_y", is_logx, is_logy)    
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, vtx_z_col, "Slice Vertex Z [cm]", "A.U.", -100, 600, 60, suffix + "_slc_vtx_z", is_logx, is_logy)
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_bnb_intime_df, data_df, vtx_z_col, "Slice Vertex Z [cm]", "A.U.", -100, 600, 60, suffix + "_slc_vtx_z", is_logx, is_logy)    

    # mc_bnb_cosmic_df_east = mc_bnb_cosmic_df[mc_bnb_cosmic_df.slc.vertex.x < 0.]
    # mc_bnb_intime_df_east = mc_bnb_intime_df[mc_bnb_intime_df.slc.vertex.x < 0.]
    # data_df_east = data_df[data_df.slc.vertex.x < 0.]
    
    # mc_bnb_cosmic_df_west = mc_bnb_cosmic_df[mc_bnb_cosmic_df.slc.vertex.x > 0.]
    # mc_bnb_intime_df_west = mc_bnb_intime_df[mc_bnb_intime_df.slc.vertex.x > 0.]
    # data_df_west = data_df[data_df.slc.vertex.x > 0.]

    # draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df_east, mc_bnb_intime_df_east, data_df_east, vtx_y_col, "East TPC Slice Vertex Y [cm]", "A.U.", -300, 300, 60, suffix + "_slc_vtx_y_east", is_logx, is_logy)
    # draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df_west, mc_bnb_intime_df_west, data_df_west, vtx_y_col, "West TPC Slice Vertex Y [cm]", "A.U.", -300, 300, 60, suffix + "_slc_vtx_y_west", is_logx, is_logy)
    
    # draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df_east, mc_bnb_intime_df_east, data_df_east, vtx_z_col, "East TPC Slice Vertex Z [cm]", "A.U.", -100, 600, 70, suffix + "_slc_vtx_z_east", is_logx, is_logy)
    # draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df_west, mc_bnb_intime_df_west, data_df_west, vtx_z_col, "West TPC Slice Vertex Z [cm]", "A.U.", -100, 600, 70, suffix + "_slc_vtx_z_west", is_logx, is_logy)
       
    # dfs for muons and protons
    mc_bnb_cosmic_muons = mc_bnb_cosmic_df[mc_bnb_cosmic_df.pfp.trk.reco_pid == 13]
    mc_bnb_intime_muons = mc_bnb_intime_df[mc_bnb_intime_df.pfp.trk.reco_pid == 13]
    data_muons = data_df[data_df.pfp.trk.reco_pid == 13]
    
    mc_bnb_cosmic_protons = mc_bnb_cosmic_df[mc_bnb_cosmic_df.pfp.trk.reco_pid == 2212]
    mc_bnb_intime_protons = mc_bnb_intime_df[mc_bnb_intime_df.pfp.trk.reco_pid == 2212]
    data_protons = data_df[data_df.pfp.trk.reco_pid == 2212]   
           
    # leading and recoil protons   
    mc_bnb_cosmic_lead_prot_df = mc_bnb_cosmic_protons.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(0)
    mc_bnb_cosmic_rec_prot_df = mc_bnb_cosmic_protons.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(1)
    
    mc_bnb_intime_lead_prot_df = mc_bnb_intime_protons.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(0)
    mc_bnb_intime_rec_prot_df = mc_bnb_intime_protons.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(1)           
 
    data_lead_prot_df = data_protons.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(0)
    data_rec_prot_df = data_protons.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(1)           
            
    ## -- 3) muon momentum
    muon_mom_col = ('pfp', 'trk', 'rangeP', 'p_muon','','')

    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_muons,
                                    mc_bnb_intime_muons,
                                    data_muons, 
                                    muon_mom_col, "$p_{\mu}$ [GeV/c]", "A.U.", 0.05, 1.2, 15, suffix + "_muon_mom", is_logx, is_logy)
    
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_muons,
                                    mc_bnb_intime_muons,
                                    data_muons, 
                                    muon_mom_col, "$p_{\mu}$ [GeV/c]", "A.U.", 0.05, 1.2, 15, suffix + "_muon_mom", is_logx, is_logy)    
    
     ## -- 4) proton momentum
    proton_mom_col = ('pfp', 'trk', 'rangeP', 'p_proton','','')
    
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_lead_prot_df,
                                    mc_bnb_intime_lead_prot_df,
                                    data_lead_prot_df, 
                                    proton_mom_col, "leading $p_{p}$ [GeV/c]", "A.U.", 0.3, 1.2, 15, suffix + "_lead_proton_mom", is_logx, is_logy)
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_lead_prot_df,
                                    mc_bnb_intime_lead_prot_df,
                                    data_lead_prot_df,
                                    proton_mom_col, "leading $p_{p}$ [GeV/c]", "A.U.", 0.3, 1.2, 15, suffix + "_lead_proton_mom", is_logx, is_logy)    
    
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_rec_prot_df,
                                    mc_bnb_intime_rec_prot_df,
                                    data_rec_prot_df, 
                                    proton_mom_col, "recoil $p_{p}$ [GeV/c]", "A.U.", 0.3, 1.2, 15, suffix + "_recoil_proton_mom", is_logx, is_logy)
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_rec_prot_df,
                                    mc_bnb_intime_rec_prot_df,
                                    data_rec_prot_df, 
                                    proton_mom_col, "recoil $p_{p}$ [GeV/c]", "A.U.", 0.3, 1.2, 15, suffix + "_recoil_proton_mom", is_logx, is_logy)          
     
    ## -- 5) muon costheta
    muon_costheta_col = ('pfp', 'trk', 'dir','z','','')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_muons,
                                    mc_bnb_intime_muons,
                                    data_muons, 
                                    muon_costheta_col, "$cos\\theta_{\mu}$", "A.U.", -1., 1., 15, suffix + "_muon_costheta", is_logx, is_logy)
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_muons,
                                    mc_bnb_intime_muons,
                                    data_muons, 
                                    muon_costheta_col, "$cos\\theta_{\mu}$", "A.U.", -1., 1., 15, suffix + "_muon_costheta", is_logx, is_logy)    
    
    ## -- 6) proton costheta
    
    proton_costheta_col = ('pfp', 'trk', 'dir','z','','')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_lead_prot_df,
                                    mc_bnb_intime_lead_prot_df,
                                    data_lead_prot_df, 
                                    proton_costheta_col, "leading $cos\\theta_{p}$", "A.U.", -1., 1., 15, suffix + "_leading_proton_costheta", is_logx, is_logy)
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_lead_prot_df,
                                    mc_bnb_intime_lead_prot_df,
                                    data_lead_prot_df, 
                                    proton_costheta_col, "leading $cos\\theta_{p}$", "A.U.", -1., 1., 15, suffix + "_leading_proton_costheta", is_logx, is_logy)          
    
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_rec_prot_df,
                                    mc_bnb_intime_rec_prot_df,
                                    data_rec_prot_df,  
                                    proton_costheta_col, "recoil $cos\\theta_{p}$", "A.U.", -1., 1., 15, suffix + "_recoil_proton_costheta", is_logx, is_logy)
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_rec_prot_df,
                                    mc_bnb_intime_rec_prot_df,
                                    data_rec_prot_df,  
                                    proton_costheta_col, "recoil $cos\\theta_{p}$", "A.U.", -1., 1., 15, suffix + "_recoil_proton_costheta", is_logx, is_logy)     
    
    
    ## -- 7) muon candidate chi2_muon  
    
    muon_cand_chi2_mu_col = ('pfp', 'trk', 'chi2pid','I2','chi2_muon','')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_muons,
                                    mc_bnb_intime_muons,
                                    data_muons, 
                                    muon_cand_chi2_mu_col, "muon candidate $\chi^{2}_{\mu}$", "A.U.", 0., 30, 20, suffix + "_muon_cand_chi2_mu", is_logx, is_logy)
    
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_muons,
                                    mc_bnb_intime_muons,
                                    data_muons, 
                                    muon_cand_chi2_mu_col, "muon candidate $\chi^{2}_{\mu}$", "A.U.", 0., 30, 20, suffix + "_muon_cand_chi2_mu", is_logx, is_logy)   
    
    ## -- 8) muon candidate chi2_p
    
    muon_cand_chi2_p_col = ('pfp', 'trk', 'chi2pid','I2','chi2_proton','')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_muons,
                                    mc_bnb_intime_muons,
                                    data_muons, 
                                    muon_cand_chi2_p_col, "muon candidate $\chi^{2}_{p}$", "A.U.", 70., 350, 20, suffix + "_muon_cand_chi2_proton", is_logx, is_logy)
    
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_muons,
                                    mc_bnb_intime_muons,
                                    data_muons, 
                                    muon_cand_chi2_p_col, "muon candidate $\chi^{2}_{p}$", "A.U.", 70., 350, 20, suffix + "_muon_cand_chi2_proton", is_logx, is_logy)              
             
    ## -- 9) leading proton candidate chi2_muon  
    
    lead_prot_cand_chi2_mu_col = ('pfp', 'trk', 'chi2pid','I2','chi2_muon','')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_lead_prot_df,
                                    mc_bnb_intime_lead_prot_df,
                                    data_lead_prot_df, 
                                    lead_prot_cand_chi2_mu_col, "leading proton candidate $\chi^{2}_{\mu}$", "A.U.", 0., 60, 20, suffix + "_lead_prot_cand_chi2_mu", is_logx, is_logy)
    
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_lead_prot_df,
                                    mc_bnb_intime_lead_prot_df,
                                    data_lead_prot_df, 
                                    lead_prot_cand_chi2_mu_col, "leading proton candidate $\chi^{2}_{\mu}$", "A.U.", 0., 60, 20, suffix + "_lead_prot_cand_chi2_mu", is_logx, is_logy)   
    
    ## -- 10) leading proton candidate chi2_p
    
    lead_prot_cand_chi2_p_col = ('pfp', 'trk', 'chi2pid','I2','chi2_proton','')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_lead_prot_df,
                                    mc_bnb_intime_lead_prot_df,
                                    data_lead_prot_df, 
                                    lead_prot_cand_chi2_p_col, "leading proton candidate $\chi^{2}_{p}$", "A.U.", 0., 100, 20, suffix + "_lead_prot_cand_chi2_proton", is_logx, is_logy)
    
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_lead_prot_df,
                                    mc_bnb_intime_lead_prot_df,
                                    data_lead_prot_df, 
                                    lead_prot_cand_chi2_p_col, "leading proton candidate $\chi^{2}_{p}$", "A.U.", 0., 100, 20, suffix + "_lead_prot_cand_chi2_proton", is_logx, is_logy)              
                 
   
    ## -- 11) recoil proton candidate chi2_muon  
    
    rec_prot_cand_chi2_mu_col = ('pfp', 'trk', 'chi2pid','I2','chi2_muon','')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_rec_prot_df,
                                    mc_bnb_intime_rec_prot_df,
                                    data_rec_prot_df, 
                                    rec_prot_cand_chi2_mu_col, "recoil proton candidate $\chi^{2}_{\mu}$", "A.U.", 0., 100, 20, suffix + "_rec_prot_cand_chi2_mu", is_logx, is_logy)
    
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_rec_prot_df,
                                    mc_bnb_intime_rec_prot_df,
                                    data_rec_prot_df, 
                                    rec_prot_cand_chi2_mu_col, "recoil proton candidate $\chi^{2}_{\mu}$", "A.U.", 0., 100, 20, suffix + "_rec_prot_cand_chi2_mu", is_logx, is_logy)   
    
    ## -- 12) recoil proton candidate chi2_p
    
    rec_prot_cand_chi2_p_col = ('pfp', 'trk', 'chi2pid','I2','chi2_proton','')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_rec_prot_df,
                                    mc_bnb_intime_rec_prot_df,
                                    data_rec_prot_df, 
                                    rec_prot_cand_chi2_p_col, "recoil proton candidate $\chi^{2}_{p}$", "A.U.", 0., 100, 20, suffix + "_rec_prot_cand_chi2_proton", is_logx, is_logy)
    
    topo_draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_rec_prot_df,
                                    mc_bnb_intime_rec_prot_df,
                                    data_rec_prot_df, 
                                    rec_prot_cand_chi2_p_col, "recoil proton candidate $\chi^{2}_{p}$", "A.U.", 0., 100, 20, suffix + "_rec_prot_cand_chi2_proton", is_logx, is_logy)              
                 
         

In [64]:
## 1) Vertex FV
mc_reco_df = mc_reco_df[cc2preco.InFV(mc_reco_df.slc.vertex)]
mc_intime_reco_df = mc_intime_reco_df[cc2preco.InFV(mc_intime_reco_df.slc.vertex)]
data_reco_df = data_reco_df[cc2preco.InFV(data_reco_df.slc.vertex)]

draw_reco_valid_plots(mc_reco_df, mc_intime_reco_df, data_reco_df, "vtx_fv")

In [65]:
## 2) Not clear cosmic
mc_reco_df = mc_reco_df[mc_reco_df.slc.is_clear_cosmic == 0]
mc_intime_reco_df = mc_intime_reco_df[mc_intime_reco_df.slc.is_clear_cosmic == 0]
data_reco_df = data_reco_df[data_reco_df.slc.is_clear_cosmic == 0]

draw_reco_valid_plots(mc_reco_df, mc_intime_reco_df, data_reco_df, "not_clear_cosmic")

In [66]:
data_reco_df.muon_counter.value_counts()

muon_counter
1    77397
2    42407
0    17619
3     5688
4      693
5       97
6       16
Name: count, dtype: int64

In [67]:
data_reco_df.proton_counter.value_counts()

proton_counter
0    74864
1    49173
2    15149
3     3779
4      734
5      158
6       60
Name: count, dtype: int64

In [68]:
data_reco_df.pion_counter.value_counts()

pion_counter
0    108084
1     31413
2      4019
3       354
4        47
Name: count, dtype: int64

In [69]:
opt0_score_col = ('slc', 'opt0', 'score', '', '', '')
opt0_time_col = ('slc', 'opt0', 'time', '', '', '')
nuscore_col = ('slc', 'nu_score', '', '','','')
baryc_col = ('slc', 'barycenterFM', 'flashTime', '','','')
optt0_time_col = ('slc', 'opt0', 'time', '','','')
optt0_score_col = ('slc', 'opt0', 'score', '','','')

draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, opt0_score_col, "Opt0 Score", "A.U.", 1., 100000., 100, "a_vtx_fv_and_not_clear_cosmic_opt0_score", True, False)
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, opt0_time_col, "Opt0 Time", "A.U.", -1.5, 6., 100, "a_vtx_fv_and_not_clear_cosmic_opt0_time", False, True)
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, nuscore_col, "$\\nu-score$", "A.U.", 0.2, 0.8, 50, "a_vtx_fv_and_not_clear_cosmic_nuscore", False, False)
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, baryc_col, "barycenter flash time [$\mu$sec]", "A.U.", -5., 5, 20, "a_vtx_fv_and_not_clear_cosmic_nuscore", False, True)
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, optt0_time_col, "optT0 time [$\mu$sec]", "A.U.", -5., 5, 20, "a_vtx_fv_and_not_clear_cosmic_optt0_time", False, True)
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, optt0_score_col, "optT0 score", "A.U.", 0., 15000, 100, "a_vtx_fv_and_not_clear_cosmic_optt0_score", False, False)

topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, opt0_score_col, "Opt0 Score", "A.U.", 1., 100000., 100, "a_vtx_fv_and_not_clear_cosmic_opt0_score", True, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, opt0_time_col, "Opt0 Time", "A.U.", -1.5, 6., 100, "a_vtx_fv_and_not_clear_cosmic_opt0_time", False, True)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, nuscore_col, "$\\nu-score$", "A.U.", 0.2, 0.8, 50, "a_vtx_fv_and_not_clear_cosmic_nuscore", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, baryc_col, "barycenter flash time [$\mu$sec]", "A.U.", -5., 5, 20, "a_vtx_fv_and_not_clear_cosmic_nuscore", False, True)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, optt0_time_col, "optT0 time [$\mu$sec]", "A.U.", -5., 5, 20, "a_vtx_fv_and_not_clear_cosmic_optt0_time", False, True)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, optt0_score_col, "optT0 score", "A.U.", 0., 15000, 100, "a_vtx_fv_and_not_clear_cosmic_optt0_score", False, False)

In [70]:
# nu_score > 0.45
mc_reco_df = mc_reco_df[(mc_reco_df.slc.nu_score > 0.45) ]
mc_intime_reco_df = mc_intime_reco_df[(mc_intime_reco_df.slc.nu_score > 0.45)]
data_reco_df = data_reco_df[(data_reco_df.slc.nu_score > 0.45)]

draw_reco_valid_plots(mc_reco_df, mc_intime_reco_df, data_reco_df, "nu_score")

In [71]:
# Copy colormap and set color for zeros
cmap = cm.get_cmap("viridis").copy()
cmap.set_under("white")   # zeros appear white

plt.hist2d(
    data_reco_df.pfp.trk.chi2pid.I2.chi2_muon, data_reco_df.pfp.trk.chi2pid.I2.chi2_proton,
    bins=[30, 50],
    range=[[0, 60], [0, 400]],
    norm=mcolors.LogNorm(vmin=1),   # log scale, zeros < 1 → "under" color
    cmap=cmap
)

plt.colorbar(label='events')
plt.xlabel(r'$\chi^2_{\mu}$')
plt.ylabel(r'$\chi^2_{p}$')
plt.title('Data Fall Val I', fontsize=16)

if show_plots:
    plt.show()
else: 
    plt.close('all')

#hist, edges = np.histogram(data_reco_df.pfp.trk.chi2pid.I2.chi2_muon, bins=np.linspace(0,50))
#plt.plot(edges[:-1], hist)
#if show_plots:
#    plt.show()
#else: 
#    plt.close('all')

plt.hist2d(
    mc_reco_df.pfp.trk.chi2pid.I2.chi2_muon, mc_reco_df.pfp.trk.chi2pid.I2.chi2_proton,
    bins=[30, 50],
    range=[[0, 60], [0, 400]],
    norm=mcolors.LogNorm(vmin=1),   # log scale, zeros < 1 → "under" color
    cmap=cmap
)

plt.colorbar(label='events')
plt.xlabel(r'$\chi^2_{\mu}$')
plt.ylabel(r'$\chi^2_{p}$')
plt.title('MC Fall Val I', fontsize=16)

if show_plots:
    plt.show()
else: 
    plt.close('all')

  cmap = cm.get_cmap("viridis").copy()


In [72]:
muon_counter_col = ('muon_counter','', '', '','','')

topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, muon_counter_col, "muon counter", "A.U.", -0.5, 5.5, 6, "a_vtx_fv_and_not_clear_cosmic_opt0_score", False, False)

In [73]:
proton_counter_col = ('proton_counter','', '', '','','')

topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, proton_counter_col, "proton counter", "A.U.", -0.5, 4.5, 5, "a_vtx_fv_and_not_clear_cosmic_opt0_score", False, False)

In [74]:
pion_counter_col = ('pion_counter','', '', '','','')

topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, pion_counter_col, "pion counter", "A.U.", -0.5, 5.5, 6, "a_vtx_fv_and_not_clear_cosmic_opt0_score", False, False)

In [75]:
#### exactly 3 pfps
mc_reco_df = pass_slc_with_n_pfps(mc_reco_df)
mc_intime_reco_df = pass_slc_with_n_pfps(mc_intime_reco_df)
data_reco_df = pass_slc_with_n_pfps(data_reco_df)

draw_reco_valid_plots(mc_reco_df, mc_intime_reco_df, data_reco_df, "three_pfps")

In [76]:
#draw_reco_valid_plots(mc_reco_df, mc_intime_reco_df, data_reco_df, "a_vtx_fv_and_not_clear_cosmic")

In [77]:
len_col = ('pfp', 'trk', 'len', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, len_col, "track length [cm]", "A.U.", 0., 250, 50, "_length", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, len_col, "track length [cm]", "A.U.", 0., 250, 50, "_length", False, False)

In [78]:
dist_col = ('pfp', 'dist_to_vertex', '', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, dist_col, "track-vertx distance [cm]", "A.U.", 0., 5, 5, "_length", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, dist_col, "track-vertx distance [cm]", "A.U.", 0., 5, 5, "_length", False, False)

In [79]:
## 3) n trk with len > 15 cm and vtx dist < 2 cm
#mc_reco_df = mc_reco_df[(mc_reco_df.pfp.trk.len > 15.) ]
#mc_intime_reco_df = mc_intime_reco_df[(mc_intime_reco_df.pfp.trk.len > 15.)]
#data_reco_df = data_reco_df[(data_reco_df.pfp.trk.len > 15.)]

#mc_reco_df = mc_reco_df[(mc_reco_df.pfp.trk.len > 15.) & (mc_reco_df.pfp.dist_to_vertex < 2.)]
#mc_intime_reco_df = mc_intime_reco_df[(mc_intime_reco_df.pfp.trk.len > 15.) & (mc_intime_reco_df.pfp.dist_to_vertex < 2.)]
#data_reco_df = data_reco_df[(data_reco_df.pfp.trk.len > 15.) & (data_reco_df.pfp.dist_to_vertex < 2.)]

#### exactly 3 pfps
#mc_reco_df = pass_slc_with_n_pfps(mc_reco_df)
#mc_intime_reco_df = pass_slc_with_n_pfps(mc_intime_reco_df)
#data_reco_df = pass_slc_with_n_pfps(data_reco_df)

In [80]:
draw_reco_valid_plots(mc_reco_df, mc_intime_reco_df, data_reco_df, "c_n_trk_len_and_distvtx")

In [81]:
## 4) trk score > 0.5
mc_reco_df = mc_reco_df[(mc_reco_df.pfp.trackScore > 0.5)]
mc_intime_reco_df = mc_intime_reco_df[(mc_intime_reco_df.pfp.trackScore > 0.5)]
data_reco_df = data_reco_df[(data_reco_df.pfp.trackScore > 0.5)]

#### exactly 3 pfps
mc_reco_df = pass_slc_with_n_pfps(mc_reco_df)
mc_intime_reco_df = pass_slc_with_n_pfps(mc_intime_reco_df)
data_reco_df = pass_slc_with_n_pfps(data_reco_df)

In [82]:
draw_reco_valid_plots(mc_reco_df, mc_intime_reco_df, data_reco_df, "d_n_trk_score_0p5")

In [83]:
def reco_imbalance(muon_dir_x, muon_dir_y, muon_dir_z, range_P_muon, 
                 lead_proton_dir_x, lead_proton_dir_y, lead_proton_dir_z, lead_range_P_proton,
                 rec_proton_dir_x, rec_proton_dir_y, rec_proton_dir_z, rec_range_P_proton):

    # deltapt
    deltapt_x = muon_dir_x.iloc[0] * range_P_muon.iloc[0] + lead_proton_dir_x.iloc[0] * lead_range_P_proton.iloc[0] + rec_proton_dir_x.iloc[0] * rec_range_P_proton.iloc[0]
    px_sq = np.power(deltapt_x, 2.)
    deltapt_y = muon_dir_y.iloc[0] * range_P_muon.iloc[0] + lead_proton_dir_y.iloc[0] * lead_range_P_proton.iloc[0] + rec_proton_dir_y.iloc[0] * rec_range_P_proton.iloc[0]
    py_sq = np.power(deltapt_y, 2.)
    deltapt = np.sqrt(px_sq + py_sq)

    # deltaalphat
    muon_px = muon_dir_x.iloc[0] * range_P_muon.iloc[0]
    muon_py = muon_dir_y.iloc[0] * range_P_muon.iloc[0]
    muon_pz = muon_dir_z.iloc[0] * range_P_muon.iloc[0]    
    muon_pt = np.sqrt( np.power(muon_px,2.) + np.power(muon_py,2.) ) 
    muon_p = range_P_muon.iloc[0]
    muon_costheta = muon_dir_z.iloc[0]
    deltaalphat_denom = muon_pt * deltapt
    deltaalphat_num = - ( muon_px * deltapt_x + muon_py * deltapt_y ) 
    deltaalphat = np.arccos( deltaalphat_num / deltaalphat_denom) * 180./np.pi
    
    #deltaphit
    lead_proton_x = lead_proton_dir_x.iloc[0] * lead_range_P_proton.iloc[0] 
    rec_proton_x = rec_proton_dir_x.iloc[0] * rec_range_P_proton.iloc[0]     
    proton_px = lead_proton_x + rec_proton_x
    
    lead_proton_y = lead_proton_dir_y.iloc[0] * lead_range_P_proton.iloc[0] 
    rec_proton_y = rec_proton_dir_y.iloc[0] * rec_range_P_proton.iloc[0]     
    proton_py = lead_proton_y + rec_proton_y
    
    lead_proton_z = lead_proton_dir_z.iloc[0] * lead_range_P_proton.iloc[0] 
    rec_proton_z = rec_proton_dir_z.iloc[0] * rec_range_P_proton.iloc[0]    
    proton_pz = lead_proton_z + rec_proton_z    
    lead_proton_costheta = lead_proton_dir_z.iloc[0]
    rec_proton_costheta = rec_proton_dir_z.iloc[0]    

    proton_pt = np.sqrt( np.power(proton_px,2.) + np.power(proton_py,2.) )   
    proton_p = np.sqrt( np.power(proton_px,2.) + np.power(proton_py,2.) + np.power(proton_pz,2.) )      
    lead_proton_p = np.sqrt( np.power(lead_proton_x,2.) + np.power(lead_proton_y,2.) + np.power(lead_proton_z,2.) )
    rec_proton_p = np.sqrt( np.power(rec_proton_x,2.) + np.power(rec_proton_y,2.) + np.power(rec_proton_z,2.) )
      
    deltaphit_denom = muon_pt * proton_pt
    deltaphit_num = - ( muon_px * proton_px + muon_py * proton_py )
    deltaphit = np.arccos(deltaphit_num / deltaphit_denom) * 180./np.pi
    
    # cos(theta_LR)
    costheta_lr_num = lead_proton_x * rec_proton_x + lead_proton_y * rec_proton_y + lead_proton_z * rec_proton_z
    costheta_lr_denom = lead_range_P_proton.iloc[0] * rec_range_P_proton.iloc[0]
    costheta_lr = costheta_lr_num / costheta_lr_denom
    
    # costheta_mu_sum
    costheta_mu_sum_num = muon_px * proton_px + muon_py * proton_py + muon_pz * proton_pz
    costheta_mu_sum_denom = muon_p * proton_p
    costheta_mu_sum = costheta_mu_sum_num / costheta_mu_sum_denom
        
    #e_cal
    e_mu = np.sqrt( np.power(muon_p,2.) + np.power(0.105,2) )
    e_lead_p = np.sqrt( np.power(lead_proton_p,2.) + np.power(0.938272,2) )
    ke_lead_p = e_lead_p - 0.938272
    e_rec_p = np.sqrt( np.power(rec_proton_p,2.) + np.power(0.938272,2) )
    ke_rec_p = e_rec_p - 0.938272    
    e_cal = e_mu + ke_lead_p + ke_rec_p + 0.0209 # https://link.springer.com/article/10.1140/epjc/s10052-019-6750-3
    
    #p_l
    p_l = muon_pz + proton_pz - e_cal
    
    #pn
    pn = np.sqrt( np.power(p_l,2.) + np.power(deltapt,2.) )
    pn_x = deltapt_x
    pn_y = deltapt_y
    pn_z = p_l
    
    # q (energy transfer)
    q_x = - muon_px
    q_y = - muon_py
    q_z = - muon_pz + e_cal
    q = np.sqrt( np.power(q_x,2.) + np.power(q_y,2.) + np.power(q_z,2.) )
    
    # phi_3d
    phi_3d_num = q_x * proton_px + q_y * proton_py + q_z * proton_pz
    phi_3d_denom = q * proton_p
    phi_3d = np.arccos(phi_3d_num / phi_3d_denom) * 180./np.pi
    
    #alpha_3d
    alpha_3d_num = q_x * pn_x + q_y * pn_y + q_z * pn_z
    alpha_3d_denom = q * pn
    alpha_3d = np.arccos(alpha_3d_num / alpha_3d_denom) * 180./np.pi
    
    # invariant mass
    invm = np.sqrt(np.power((e_lead_p + e_rec_p), 2) - np.power(proton_p, 2));
    
    return pd.Series({
                    'deltapt': deltapt,
                    'deltaalphat': deltaalphat,
                    'deltaphit': deltaphit,
                    'costheta_lr':costheta_lr,
                    'costheta_mu_sum':costheta_mu_sum,
                    'e_cal': e_cal,
                    'pn':pn,
                    'phi_3d': phi_3d,
                    'alpha_3d': alpha_3d,
                    'invm': invm,
                    'muon_p': muon_p,
                    'lead_proton_p': lead_proton_p,
                    'rec_proton_p': rec_proton_p,
                    'muon_costheta' : muon_costheta,
                    'lead_proton_costheta' : lead_proton_costheta,
                    'rec_proton_costheta' : rec_proton_costheta                                   
                    })

def measure_reco_imbalance(group):
    
    muons = group[group.pfp.trk.reco_pid == 13]
    protons = group[group.pfp.trk.reco_pid == 2212]    
    
    lead_protons = protons.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(0)
    rec_protons = protons.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(1)    
    
    muon_dir_x = muons[('pfp', 'trk', 'dir', 'x', '', '')]
    muon_dir_y = muons[('pfp', 'trk', 'dir', 'y', '', '')]
    muon_dir_z = muons[('pfp', 'trk', 'dir', 'z', '', '')]
    muon_range = muons[('pfp', 'trk', 'rangeP', 'p_muon', '', '')]
    
    lead_proton_dir_x = lead_protons[('pfp', 'trk', 'dir', 'x', '', '')]
    lead_proton_dir_y = lead_protons[('pfp', 'trk', 'dir', 'y', '', '')]
    lead_proton_dir_z = lead_protons[('pfp', 'trk', 'dir', 'z', '', '')]
    lead_proton_range = lead_protons[('pfp', 'trk', 'rangeP', 'p_proton', '', '')]
 
    rec_proton_dir_x = rec_protons[('pfp', 'trk', 'dir', 'x', '', '')]
    rec_proton_dir_y = rec_protons[('pfp', 'trk', 'dir', 'y', '', '')]
    rec_proton_dir_z = rec_protons[('pfp', 'trk', 'dir', 'z', '', '')]
    rec_proton_range = rec_protons[('pfp', 'trk', 'rangeP', 'p_proton', '', '')]        
    
    return reco_imbalance(muon_dir_x, muon_dir_y, muon_dir_z, muon_range, 
                        lead_proton_dir_x, lead_proton_dir_y, lead_proton_dir_z, lead_proton_range,
                        rec_proton_dir_x, rec_proton_dir_y, rec_proton_dir_z, rec_proton_range)  

In [84]:
## study if track contained
cc2preco.add_contained_col(mc_reco_df)
cc2preco.add_contained_col(mc_intime_reco_df)
cc2preco.add_contained_col(data_reco_df)

In [85]:
#mc_reco_df = mc_reco_df[mc_reco_df.pfp.contained]
#mc_intime_reco_df = mc_intime_reco_df[mc_intime_reco_df.pfp.contained]
#data_reco_df = data_reco_df[data_reco_df.pfp.contained]

#### exactly 3 pfps
#mc_reco_df = pass_slc_with_n_pfps(mc_reco_df)
#mc_intime_reco_df = pass_slc_with_n_pfps(mc_intime_reco_df)
#data_reco_df = pass_slc_with_n_pfps(data_reco_df)

In [86]:
#draw_reco_valid_plots(mc_reco_df, mc_intime_reco_df, data_reco_df, "containment")

In [87]:
mc_reco_df = mc_reco_df[ ( (mc_reco_df.pfp.trk.reco_pid == 13) & (mc_reco_df.pfp.trk.rangeP.p_muon > 0.08) ) | 
                         ( (mc_reco_df.pfp.trk.reco_pid == 2212) & (mc_reco_df.pfp.trk.rangeP.p_proton > 0.31) )]
mc_intime_reco_df = mc_intime_reco_df[ ( (mc_intime_reco_df.pfp.trk.reco_pid == 13) & (mc_intime_reco_df.pfp.trk.rangeP.p_muon > 0.08) ) | 
                       ( (mc_intime_reco_df.pfp.trk.reco_pid == 2212) & (mc_intime_reco_df.pfp.trk.rangeP.p_proton > 0.31) )]
data_reco_df = data_reco_df[ ( (data_reco_df.pfp.trk.reco_pid == 13) & (data_reco_df.pfp.trk.rangeP.p_muon > 0.08) ) | 
                       ( (data_reco_df.pfp.trk.reco_pid == 2212) & (data_reco_df.pfp.trk.rangeP.p_proton > 0.31) )]

### exactly 3 pfps
mc_reco_df = pass_slc_with_n_pfps(mc_reco_df)
mc_intime_reco_df = pass_slc_with_n_pfps(mc_intime_reco_df)
data_reco_df = pass_slc_with_n_pfps(data_reco_df)

In [88]:
draw_reco_valid_plots(mc_reco_df, mc_intime_reco_df, data_reco_df, "h_trk_threshold")

In [89]:
def safe_series(df, col):
    """
    Safely extract a Series from a DataFrame.
    Returns an empty float Series if df is None, empty, or missing the column.
    """
    if df is None or df.empty or col not in df.columns:
        return pd.Series([], dtype=float)
    return df[col]

In [90]:
# List of imbalance variables to extract
reco_cols = [
    'deltapt', 'deltaalphat', 'deltaphit', 'costheta_lr', 
    'costheta_mu_sum', 'e_cal', 'pn', 'phi_3d', 
    'alpha_3d', 'invm', 'muon_p', 'lead_proton_p', 'rec_proton_p',
    'muon_costheta', 'lead_proton_costheta', 'rec_proton_costheta'
]

mc_reco_imbalance_df = mc_reco_df.groupby(['__ntuple', 'entry', 'rec.slc..index']).apply(measure_reco_imbalance)
mc_intime_reco_imbalance_df = mc_intime_reco_df.groupby(['__ntuple', 'entry', 'rec.slc..index']).apply(measure_reco_imbalance)
data_reco_imbalance_df = data_reco_df.groupby(['__ntuple', 'entry', 'rec.slc..index']).apply(measure_reco_imbalance)

# Pair each source DataFrame with its corresponding imbalance DataFrame
df_pairs = [
    (mc_reco_df, mc_reco_imbalance_df),
    (mc_intime_reco_df, mc_intime_reco_imbalance_df),
    (data_reco_df, data_reco_imbalance_df)
]

# Assign safe_series for all columns in all DataFrames
for df, imbalance_df in df_pairs:
    for col in reco_cols:
        df[(f'reco_{col}', '', '', '', '', '')] = safe_series(imbalance_df, col)

In [91]:
deltapt_col = ('reco_deltapt', '', '', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, deltapt_col, "$\\delta p_{T}$ [GeV/c]", "A.U.", 0., 1., 20, "_deltapt", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, deltapt_col, "$\\delta p_{T}$ [GeV/c]", "A.U.", 0., 1., 20, "_deltapt", False, False)

In [92]:
deltaalphat_col = ('reco_deltaalphat', '', '', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, deltaalphat_col, "$\\delta\\alpha_{T}$ [deg]", "A.U.", 0., 180., 12, "_deltaalphat", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, deltaalphat_col, "$\\delta\\alpha_{T}$ [deg]", "A.U.", 0., 180., 12, "_deltaalphat", False, False)

In [93]:
deltaphit_col = ('reco_deltaphit', '', '', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, deltaphit_col, "$\\delta\\phi_{T}$ [deg]", "A.U.", 0., 180., 12, "_deltaphit", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, deltaphit_col, "$\\delta\\phi_{T}$ [deg]", "A.U.", 0., 180., 12, "_deltaphit", False, False)

In [94]:
costheta_lr_col = ('reco_costheta_lr', '', '', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, costheta_lr_col, "cos$\\theta_{L,R}$", "A.U.", -1, 1., 20, "_costheta_lr", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, costheta_lr_col, "cos$\\theta_{L,R}$", "A.U.", -1, 1., 20, "_costheta_lr", False, False)

In [95]:
costheta_mu_sum_col = ('reco_costheta_mu_sum', '', '', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, costheta_mu_sum_col, "cos$\\theta_{\\mu,sum}$", "A.U.", -1, 1., 20, "_costheta_mu_sum", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, costheta_mu_sum_col, "cos$\\theta_{\\mu,sum}$", "A.U.", -1, 1., 20, "_costheta_mu_sum", False, False)

In [96]:
e_cal_col = ('reco_e_cal', '', '', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, e_cal_col, "E$_{cal}$ [GeV]", "A.U.", 0.25, 1.5, 20, "_e_cal", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, e_cal_col, "E$_{cal}$ [GeV]", "A.U.", 0.25, 1.5, 20, "_e_cal", False, False)

In [97]:
pn_col = ('reco_pn', '', '', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, pn_col, "$p_{n}$ [GeV/c]", "A.U.", 0., 1., 20, "_pn", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, pn_col, "$p_{n}$ [GeV/c]", "A.U.", 0., 1., 20, "_pn", False, False)

In [98]:
phi_3d_col = ('reco_phi_3d', '', '', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, phi_3d_col, "$\\phi_{3D}$ [deg]", "A.U.", 0., 180., 30, "_phi_3d", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, phi_3d_col, "$\\phi_{3D}$ [deg]", "A.U.", 0., 180., 30, "_phi_3d", False, False)

In [99]:
alpha_3d_col = ('reco_alpha_3d', '', '', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, alpha_3d_col, "$\\alpha_{3D}$ [deg]", "A.U.", 0., 180., 30, "_alpha_3d", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, alpha_3d_col, "$\\alpha_{3D}$ [deg]", "A.U.", 0., 180., 30, "_alpha_3d", False, False)

In [100]:
invm_col = ('reco_invm', '', '', '', '', '')
draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, invm_col, "W [GeV/$c^{2}$]", "A.U.", 1.8, 2.5, 30, "_invm", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, invm_col, "W [GeV/$c^{2}$]", "A.U.", 1.8, 2.5, 30, "_invm", False, False)

In [101]:
muon_p_col = ('reco_muon_p', '', '', '', '', '')

draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, muon_p_col, "$p_{\mu}$ [GeV/c]", "A.U.", 0.05, 1.2, 15, "_muon_mom", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, muon_p_col, "$p_{\mu}$ [GeV/c]", "A.U.", 0.05, 1.2, 15, "_muon_mom", False, False)

In [102]:
lead_proton_p_col = ('reco_lead_proton_p', '', '', '', '', '')

draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, lead_proton_p_col, "leading $p_{p}$ [GeV/c]", "A.U.", 0.3, 1.2, 15, "_lead_proton_mom", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, lead_proton_p_col, "leading $p_{p}$ [GeV/c]", "A.U.", 0.3, 1.2, 15, "_lead_proton_mom", False, False)

In [103]:
rec_proton_p_col = ('reco_rec_proton_p', '', '', '', '', '')

draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, rec_proton_p_col, "recoil $p_{p}$ [GeV/c]", "A.U.", 0.3, 1.2, 15, "_rec_proton_mom", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, rec_proton_p_col, "recoil $p_{p}$ [GeV/c]", "A.U.", 0.3, 1.2, 15, "_rec_proton_mom", False, False)

In [104]:
muon_costheta_col = ('reco_muon_costheta', '', '', '', '', '')

draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, muon_costheta_col, "$cos\\theta_{\mu}$", "A.U.", -1., 1., 15, "_muon_costheta", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, muon_costheta_col, "$cos\\theta_{\mu}$", "A.U.", -1., 1, 15, "_muon_costheta", False, False)

In [105]:
lead_proton_costheta_col = ('reco_lead_proton_costheta', '', '', '', '', '')

draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, lead_proton_costheta_col, "leading $cos\\theta_{p}$", "A.U.", -1., 1., 15, "_lead_proton_costheta", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, lead_proton_costheta_col, "leading $cos\\theta_{p}$", "A.U.", -1., 1, 15, "_lead_proton_costheta", False, False)

In [106]:
rec_proton_costheta_col = ('reco_rec_proton_costheta', '', '', '', '', '')

draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, rec_proton_costheta_col, "recoil $cos\\theta_{p}$", "A.U.", -1., 1., 15, "_rec_proton_costheta", False, False)
topo_draw_mc_data_shape_comp_per_slc(mc_reco_df, mc_intime_reco_df, data_reco_df, rec_proton_costheta_col, "recoil $cos\\theta_{p}$", "A.U.", -1., 1, 15, "_rec_proton_costheta", False, False)

In [107]:
# Copy colormap and set color for zeros
cmap = cm.get_cmap("viridis").copy()
cmap.set_under("white")   # zeros appear white

plt.hist2d(
    data_reco_df.pfp.trk.chi2pid.I2.chi2_muon, data_reco_df.pfp.trk.chi2pid.I2.chi2_proton,
    bins=[30, 50],
    range=[[0, 60], [0, 400]],
    norm=mcolors.LogNorm(vmin=1),   # log scale, zeros < 1 → "under" color
    cmap=cmap
)

plt.colorbar(label='events')
plt.xlabel(r'$\chi^2_{\mu}$')
plt.ylabel(r'$\chi^2_{p}$')
plt.title('Data Fall Val I', fontsize=16)

if show_plots:
    plt.show()
else: 
    plt.close('all')

#hist, edges = np.histogram(data_reco_df.pfp.trk.chi2pid.I2.chi2_muon, bins=np.linspace(0,50))
#plt.plot(edges[:-1], hist)
#if show_plots:
#    plt.show()
#else: 
#    plt.close('all')

plt.hist2d(
    mc_reco_df.pfp.trk.chi2pid.I2.chi2_muon, mc_reco_df.pfp.trk.chi2pid.I2.chi2_proton,
    bins=[30, 50],
    range=[[0, 60], [0, 400]],
    norm=mcolors.LogNorm(vmin=1),   # log scale, zeros < 1 → "under" color
    cmap=cmap
)

plt.colorbar(label='events')
plt.xlabel(r'$\chi^2_{\mu}$')
plt.ylabel(r'$\chi^2_{p}$')
plt.title('MC Fall Val I', fontsize=16)

if show_plots:
    plt.show()
else: 
    plt.close('all')

  cmap = cm.get_cmap("viridis").copy()


# Store dfs with variables of interest

In [108]:
data_reco_df.to_pickle(f"/exp/sbnd/data/users/{user}/dfs/{code_version}/data_MCP2025B_02_DevSample_bnblight_v10_06_00_02_flatcaf_sbnd_cc2p.df")
mc_reco_df.to_pickle(f"/exp/sbnd/data/users/{user}/dfs/{code_version}/mc_MCP2025B_5e18_02_prodgenie_corsika_proton_rockbox_sbnd_CV_caf_flat_caf_sbnd_cc2p.df")
mc_intime_reco_df.to_pickle(f"/exp/sbnd/data/users/{user}/dfs/{code_version}/mc_MCP2025B_5e18_02_prodcorsika_proton_intime_sbnd_CV_caf_flat_caf_sbnd_100files_cc2p.df")

# Make MCstat uncertainty universes

In [109]:
def get_MCstat_unc(evt_df, hdr_df, n_universes=100, poisson_mean=1.0):

    # --- Extract slice info ---
    runno    = evt_df[("slc", "self")].index.get_level_values(0).values
    subrunno = evt_df[("slc", "self")].index.get_level_values(1).values
    evtno    = evt_df[("slc", "self")].index.get_level_values(2).values
    slcid    = evt_df[("slc", "self")].values

    n_events = len(evt_df)
    
    # --- Generate MCstat universes ---
    MCstat_univ_events = np.random.poisson(lam=poisson_mean, size=(n_events, n_universes))

    # --- Create base MultiIndex for columns (2 levels) ---
    base_cols = pd.MultiIndex.from_product(
        [["MCstat"], [f"univ_{i}" for i in range(n_universes)]]
    )

    # --- Pad columns to match evt_df column levels ---
    n_levels = evt_df.columns.nlevels
    if n_levels > 2:
        padded_cols = []
        for col in base_cols:
            # fill missing levels with empty strings
            padded_col = col + ("",) * (n_levels - len(col))
            padded_cols.append(padded_col)
        mcstat_univ_cols_padded = pd.MultiIndex.from_tuples(padded_cols, names=evt_df.columns.names)
    else:
        mcstat_univ_cols_padded = base_cols

    # --- Create DataFrame ---
    mcstat_univ_wgt = pd.DataFrame(
        MCstat_univ_events,
        index=evt_df.index,
        columns=mcstat_univ_cols_padded
    )

    # --- Drop existing MCstat columns if any ---
    evt_df = evt_df.drop(columns=[col for col in evt_df.columns if col[0] == "MCstat"], errors='ignore')

    # --- Join safely ---
    evt_df = evt_df.join(mcstat_univ_wgt, how="left")

    return evt_df, MCstat_univ_events

In [110]:
mc_reco_df, MCstat_univ_events = get_MCstat_unc(mc_reco_df, mc_hdr_df, n_universes=100)

  runno    = evt_df[("slc", "self")].index.get_level_values(0).values
  subrunno = evt_df[("slc", "self")].index.get_level_values(1).values
  evtno    = evt_df[("slc", "self")].index.get_level_values(2).values
  slcid    = evt_df[("slc", "self")].values


# POT

In [111]:
## total pot
mc_tot_pot = mc_hdr_df['pot'].sum()
print("mc_tot_pot: %.3e" %(mc_tot_pot))

target_pot = 1e20
mc_pot_scale = target_pot / mc_tot_pot
print("mc_pot_scale: %.3e" %(mc_pot_scale))
mc_pot_scale = 1.

mc_reco_df["pot_weight"] = mc_pot_scale * np.ones(len(mc_reco_df))

mc_tot_pot: 2.919e+18
mc_pot_scale: 3.426e+01


# Constants

In [112]:
# flux file, units: /m^2/10^6 POT 
# 50 MeV bins
fluxfile = "/exp/sbnd/data/users/apapadop/flux/sbnd_original_flux.root"
flux = uproot.open(fluxfile)
print(flux.keys())

# numu flux
numu_flux = flux["flux_sbnd_numu"].to_numpy()
bin_edges = numu_flux[1]
flux_vals = numu_flux[0]

plt.hist(bin_edges[:-1], bins=bin_edges, weights=flux_vals, histtype="step")
plt.xlabel("E [GeV]")
plt.ylabel("Flux [/m$^{2}$/10$^{6}$ POT]")
plt.title("SBND $\\nu_\\mu$ Flux")
plt.xlim(0, 3)  # Limit x-axis from 0 to 2 GeV
if show_plots:
    plt.show()
else: 
    plt.close('all')

# get integrated flux
integrated_flux = flux_vals.sum()
integrated_flux /= 1e4 # to cm2
INTEGRATED_FLUX = integrated_flux * mc_tot_pot / 1e6 # POT
print("Integrated flux: %.3e" % INTEGRATED_FLUX)

['flux_sbnd_numu;1', 'flux_sbnd_nue;1', 'flux_sbnd_anumu;1', 'flux_sbnd_anue;1']
Integrated flux: 4.714e+10


In [113]:
RHO = 1.3836  #g/cm3, liquid Ar density
N_A = 6.02214076e23 # Avogadro’s number
M_AR = 39.95 # g, molar mass of argon
V_SBND = 380 * 380 * 440 # cm3, the active volume of the detector 
NTARGETS = RHO * V_SBND * N_A / M_AR
print("# of targets: ", NTARGETS)

# of targets:  1.3251484770937053e+30


In [114]:
# set to 1 for event rates
XSEC_UNIT = 1 / (INTEGRATED_FLUX * NTARGETS)

# XSEC_UNIT = 1
print("xsec unit: ", XSEC_UNIT)

xsec unit:  1.6007688898989545e-41


In [115]:
# variable to unfold

class VariableConfig:
    """
    A configurable class for setting up unfolding variable configurations.
    Choose a configuration using one of the provided class methods,
    or instantiate directly with custom parameters.
    """
    def __init__(self, var_save_name, var_plot_name, var_unit, bins, var_evt_reco_col, var_evt_truth_col, var_nu_col):
        self.var_save_name = var_save_name
        self.var_plot_name = var_plot_name
        self.var_unit = var_unit
        unit_suffix = f"~[{var_unit}]" if len(var_unit) > 0 else ""
        self.var_labels = [r"$\mathrm{" + var_plot_name + unit_suffix + "}$", 
                           r"$\mathrm{" + var_plot_name + "^{reco.}" + unit_suffix + "}$", 
                           r"$\mathrm{" + var_plot_name + "^{true}" + unit_suffix + "}$"]
        self.bins = bins
        self.bin_centers = (bins[:-1] + bins[1:]) / 2.
        self.var_evt_reco_col = var_evt_reco_col
        self.var_evt_truth_col = var_evt_truth_col
        self.var_nu_col = var_nu_col


    @classmethod
    def muon_momentum(cls):
        return cls(
            var_save_name="muon-p",
            var_plot_name="P_\mu",
            var_unit="GeV/c",
            bins=np.linspace(0.15, 1.2, 11),
            var_evt_reco_col=('mu', 'pfp', 'trk', 'P', 'p_muon', '', '', ''),
            var_evt_truth_col=('mu', 'pfp', 'trk', 'truth', 'p', 'totp', '', ''),
            var_nu_col=('mu', 'totp', '')
        )

    @classmethod
    def muon_direction(cls):
        return cls(
            var_save_name="muon-dir_z",
            var_plot_name="cos(\theta_\mu)",
            var_unit="",
            bins=np.linspace(-1, 1, 6),
            var_evt_reco_col=('mu', 'pfp', 'trk', 'dir', 'z', '', '', ''),
            var_evt_truth_col=('mu', 'pfp', 'trk', 'truth', 'p', 'dir', 'z', ''),
            var_nu_col=('mu', 'dir', 'z')
        )

    @classmethod
    def proton_momentum(cls):
        return cls(
            var_save_name="proton-p",
            var_plot_name="P_p",
            var_unit="GeV/c",
            bins=np.linspace(0.2, 2, 6),
            var_evt_reco_col=('p', 'pfp', 'trk', 'P', 'p_proton', '', '', ''),
            var_evt_truth_col=('p', 'pfp', 'trk', 'truth', 'p', 'totp', '', ''),
            var_nu_col=('p', 'totp', '')
        )

    @classmethod
    def proton_direction(cls):
        return cls(
            var_save_name="proton-dir_z",
            var_plot_name="cos(\theta_p)",
            var_unit="",
            bins=np.linspace(-1, 1, 6),
            var_evt_reco_col=('p', 'pfp', 'trk', 'dir', 'z', '', '', ''),
            var_evt_truth_col=('p', 'pfp', 'trk', 'truth', 'p', 'dir', 'z', ''),
            var_nu_col=('p', 'dir', 'z')
        )

var_config = VariableConfig.muon_momentum()

# Make dfs for analysis

np.clip is for including underflow events into the first bin and overflow events into the last bin

In [189]:
# Total MC reco muon momentum: for fake data
eps = 1e-8
var_total_mc = mc_reco_df[var_config.var_evt_reco_col]
var_total_mc = np.clip(var_total_mc, var_config.bins[0], var_config.bins[-1] - eps)
weights_total_mc = mc_reco_df.loc[:, 'pot_weight']

# --- all events, selected ---
# mc_reco_df divided into topology modes for subtraction from data in future
# first item in list is the signal topology
mc_reco_df_divided = [mc_reco_df[mc_reco_df.nuint_categ == mode]for mode in topo_mode_list]

# Reco variable distribution for each 'nuint_categ' for stack plot and subtraction from the fake data
var_per_nuint_categ_mc = [mc_reco_df[mc_reco_df.nuint_categ == mode][var_config.var_evt_reco_col]for mode in topo_mode_list]
var_per_nuint_categ_mc = [s.clip(var_config.bins[0], var_config.bins[-1] - eps) for s in var_per_nuint_categ_mc]
weights_per_categ = [mc_reco_df.loc[mc_reco_df.nuint_categ == mode, 'pot_weight'] for mode in topo_mode_list]

# Reco variable distribution for each genie mode
var_per_genie_mode_mc = [mc_reco_df[mc_reco_df.genie_mode == mode][var_config.var_evt_reco_col]for mode in mode_list]
var_per_genie_mode_mc = [s.clip(var_config.bins[0], var_config.bins[-1] - eps) for s in var_per_genie_mode_mc]
weights_per_genie_mode = [mc_reco_df.loc[mc_reco_df.genie_mode == mode, 'pot_weight'] for mode in mode_list]


# --- signal events ---
# selected, for response matrix
# Signal event's reco muon momentum after the event selection
var_signal_sel_reco = mc_reco_df[mc_reco_df.nuint_categ == 1][var_config.var_evt_reco_col]
var_signal_sel_reco = np.clip(var_signal_sel_reco, var_config.bins[0], var_config.bins[-1] - eps)
weight_signal = mc_reco_df.loc[mc_reco_df.nuint_categ == 1, 'pot_weight']

# Signal event's true muon momentum after the event selection
var_signal_sel_truth = mc_reco_df[mc_reco_df.nuint_categ == 1][var_config.var_evt_truth_col]
var_signal_sel_truth = np.clip(var_signal_sel_truth, var_config.bins[0], var_config.bins[-1] - eps)
weight_true_signal = mc_reco_df.loc[mc_reco_df.nuint_categ == 1, 'pot_weight']

# Signal event's true muon momentum without event selection
var_truth_signal = mc_nudf[mc_nudf.nuint_categ == 1][var_config.var_nu_col]
var_truth_signal = np.clip(var_truth_signal, var_config.bins[0], var_config.bins[-1] - eps)
weight_truth_signal = np.full_like(var_truth_signal, mc_pot_scale, dtype=float)

KeyError: 'Key length (8) exceeds index depth (6)'