In [None]:
## In this example, we will study output data frame from pandora.py configuration
#### 1. Opening each data frame and check structure
#### 2. Collect POT and scale factor to the target POT
#### 3. Merge evtdf and mcnudf for further study
#### 4. Draw some plots for each slice and for each pfp

import os
import sys

import numpy as np
import math
import uproot as uproot
import pickle
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.colors
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import ticker
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
from matplotlib import gridspec

# Add the head direcoty to sys.path
workspace_root = os.getcwd()  
sys.path.insert(0, workspace_root + "/../../")

# import this repo's classes
import pyanalib.pandas_helpers as ph
import pyanalib.split_df_helpers as splh
import pyanalib.stat_helpers as sh

np.seterr(divide='ignore', invalid='ignore', over='ignore')

In [None]:
sample_str = "v10_06_00_05"

## Open Dataframes

In [None]:
## df files to open
mc_bnb_cosmic_file = "../../mc_MCP2025B_1e20_05_prodgenie_corsika_proton_rockbox_sbnd_CV_caf_flat_caf_sbnd.df"
mc_rockbox_th1to100_file = "../../mc_MCP2025B_prodgenie_corsika_proton_rockbox_lowenergydirt_sbnd_CV_caf_flat_caf_sbnd.df"
data_offbeam_light_file = "../../data_MCP2025B_02_InTimeCosmics_offbeamlight_v10_06_00_05_flatcaf_sbnd.df"
data_bnb_light_dev_file = "../../data_MCP2025B_05_DevSample_bnblight_v10_06_00_05_flatcaf_sbnd.df"

In [None]:
## Check keys in each file
print("keys in mc_bnb_cosmic_file")
splh.print_keys(mc_bnb_cosmic_file)

print("keys in data_bnb_light_dev_file")
splh.print_keys(data_bnb_light_dev_file)

In [None]:
## Check split multiplicity
print("mc_bnb_cosmic_file n_split: %d" %splh.get_n_split(mc_bnb_cosmic_file))
print("data_bnb_light_dev_file n_split: %d" %splh.get_n_split(data_bnb_light_dev_file))

In [None]:
## Define keys to load
n_max_concat = 2 ## for big files, each key would have more than one split
keys2load = ['opflash', 'hdr', 'pot', 'evt', 'mcnu']

In [None]:
## Load DataFrames
mc_bnb_cosmic_dfs = splh.load_dfs(mc_bnb_cosmic_file, keys2load, n_max_concat)
mc_rockbox_th1to100_dfs = splh.load_dfs(mc_rockbox_th1to100_file, keys2load, n_max_concat)
data_offbeam_light_dfs = splh.load_dfs(data_offbeam_light_file, keys2load, n_max_concat)
data_bnb_light_dfs = splh.load_dfs(data_bnb_light_dev_file, keys2load, n_max_concat)

## Check DataFrames

In [None]:
mc_bnb_cosmic_dfs['evt']

In [None]:
mc_bnb_cosmic_dfs['evt'].columns

## Check duplicated (run, subrun, evt) and filter

Sometimes, there are dulpications in (run, subrun, evt). This section will validate it and filter out if there is any duplication.

### Define functions for duplication validation and filtering

In [None]:
def find_duplicate_run_evt_combinations(df):
    """
    Given a DataFrame with a MultiIndex of (__ntuple, entry), find all rows
    where (run, evt) combinations are duplicated.

    Parameters:
        df (pd.DataFrame): Input DataFrame with MultiIndex and columns 'run' and 'evt'.

    Returns:
        pd.DataFrame: Filtered DataFrame containing only duplicated (run, evt) combinations,
                      including the original (__ntuple, entry) indices.
    """
    # Reset index to access MultiIndex as columns
    df_reset = df.reset_index()

    # Find duplicated (run, evt) combinations
    dup_mask = df_reset.duplicated(subset=['run', 'subrun', 'evt'], keep=False)

    # Extract duplicates
    duplicates = df_reset[dup_mask]

    # Sort for readability
    duplicates_sorted = duplicates.sort_values(by=['run', 'subrun', 'evt'])

    #return duplicates_sorted[['__ntuple', 'entry', 'run', 'subrun', 'evt']]

    print(duplicates_sorted[['__ntuple', 'entry', 'run', 'subrun', 'evt']])
    #return duplicates_sorted[['__ntuple', 'entry', 'run', 'subrun', 'evt']]

def plot_duplicate_run_subrun_evt_distribution(df, title=""):
    """
    Computes and plots the distribution of how many times each (run, subrun, evt) combination appears.

    Parameters:
        df (pd.DataFrame): Input DataFrame with MultiIndex (__ntuple, entry)
                           and columns including 'run', 'subrun', and 'evt'.
        title (str): Title for the plot (optional).
    
    Returns:
        pd.Series: Duplication count distribution 
                   (index = count, value = number of (run, subrun, evt) with that count)
    """
    # Reset index to work with columns
    df_reset = df.reset_index()

    # Count occurrences of each (run, subrun, evt)
    combo_counts = df_reset.groupby(['run', 'subrun', 'evt']).size()

    # Get distribution of these counts
    distribution = combo_counts.value_counts().sort_index()

    # Plotting
    plt.figure(figsize=(8, 5))
    distribution.plot(kind='bar', log=True)
    plt.xlabel('Number of times (run, subrun, evt) appears')
    plt.ylabel('Entries (spills)')
    plt.title(title)
    plt.grid(True, axis='y', linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()

def filter_unique_events(df):
    # If your index includes '__ntuple' and 'entry', reset to access them as columns
    df_reset = df.reset_index()

    # Find duplicated (run, subrun, evt) — keep=False marks *all* occurrences as duplicates
    dup_mask = df_reset.duplicated(subset=['run', 'subrun', 'evt'], keep=False)

    # Keep only rows that are NOT duplicated
    unique_df = df_reset[~dup_mask]

    # Optional: set index back if needed
    return unique_df.set_index(df.index.names)

def filter_using_hdr(df, hdrdf):
    allowed_keys = set(hdrdf.index)

    ntuples = df.index.get_level_values('__ntuple').to_numpy()
    entries = df.index.get_level_values('entry').to_numpy()
    keys = np.column_stack((ntuples, entries))

    # Create mask using list comprehension (faster than pure zip for large data)
    mask = [(nt, en) in allowed_keys for nt, en in keys]

    return df[mask]

### Perform duplication validation

In [None]:
print("duplication for MC BNB + Cosmic sample")
find_duplicate_run_evt_combinations(mc_bnb_cosmic_dfs["hdr"])

print("\n")
print("duplication for low th. MC BNB + Cosmic sample")
find_duplicate_run_evt_combinations(mc_rockbox_th1to100_dfs["hdr"])

print("\n")
print("duplication for Off-beam + light sample")
find_duplicate_run_evt_combinations(data_offbeam_light_dfs["hdr"])

print("\n")
print("duplication for BNB + light sample")
find_duplicate_run_evt_combinations(data_bnb_light_dfs["hdr"])

In [None]:
plot_duplicate_run_subrun_evt_distribution(mc_bnb_cosmic_dfs["hdr"], "BNB + Cosmic MC")
plot_duplicate_run_subrun_evt_distribution(mc_rockbox_th1to100_dfs["hdr"], "Low Th. BNB + Cosmic MC")
plot_duplicate_run_subrun_evt_distribution(data_offbeam_light_dfs["hdr"], "Off-beam + Light Data")
plot_duplicate_run_subrun_evt_distribution(data_bnb_light_dfs["hdr"], "BNB + Light Data")

### Perform filtering

In [None]:
### Filter the hdr DataFrame first, then filter other DataFrames by matching with the hdr DataFrame
mc_bnb_cosmic_dfs["hdr"] = filter_unique_events(mc_bnb_cosmic_dfs["hdr"])
mc_rockbox_th1to100_dfs["hdr"] = filter_unique_events(mc_rockbox_th1to100_dfs["hdr"])
data_offbeam_light_dfs["hdr"] = filter_unique_events(data_offbeam_light_dfs["hdr"])
data_bnb_light_dfs["hdr"] = filter_unique_events(data_bnb_light_dfs["hdr"])

In [None]:
## Double check if it is filtered
find_duplicate_run_evt_combinations(mc_bnb_cosmic_dfs["hdr"])
find_duplicate_run_evt_combinations(mc_rockbox_th1to100_dfs["hdr"])
find_duplicate_run_evt_combinations(data_offbeam_light_dfs["hdr"])
find_duplicate_run_evt_combinations(data_bnb_light_dfs["hdr"])

In [None]:
mc_bnb_cosmic_dfs["evt"] = filter_using_hdr(mc_bnb_cosmic_dfs["evt"], mc_bnb_cosmic_dfs["hdr"])
mc_bnb_cosmic_dfs["opflash"] = filter_using_hdr(mc_bnb_cosmic_dfs["opflash"], mc_bnb_cosmic_dfs["hdr"])
mc_bnb_cosmic_dfs["pot"] = filter_using_hdr(mc_bnb_cosmic_dfs["pot"], mc_bnb_cosmic_dfs["hdr"])
mc_bnb_cosmic_dfs["mcnu"] = filter_using_hdr(mc_bnb_cosmic_dfs["mcnu"], mc_bnb_cosmic_dfs["hdr"])

mc_rockbox_th1to100_dfs["evt"] = filter_using_hdr(mc_rockbox_th1to100_dfs["evt"], mc_rockbox_th1to100_dfs["hdr"])
mc_rockbox_th1to100_dfs["opflash"] = filter_using_hdr(mc_rockbox_th1to100_dfs["opflash"], mc_rockbox_th1to100_dfs["hdr"])
mc_rockbox_th1to100_dfs["pot"] = filter_using_hdr(mc_rockbox_th1to100_dfs["pot"], mc_rockbox_th1to100_dfs["hdr"])
mc_rockbox_th1to100_dfs["mcnu"] = filter_using_hdr(mc_rockbox_th1to100_dfs["mcnu"], mc_rockbox_th1to100_dfs["hdr"])

data_offbeam_light_dfs["evt"] = filter_using_hdr(data_offbeam_light_dfs["evt"], data_offbeam_light_dfs["hdr"])
data_offbeam_light_dfs["opflash"] = filter_using_hdr(data_offbeam_light_dfs["opflash"], data_offbeam_light_dfs["hdr"])
data_offbeam_light_dfs["pot"] = filter_using_hdr(data_offbeam_light_dfs["pot"], data_offbeam_light_dfs["hdr"])

data_bnb_light_dfs["evt"] = filter_using_hdr(data_bnb_light_dfs["evt"], data_bnb_light_dfs["hdr"])
data_bnb_light_dfs["opflash"] = filter_using_hdr(data_bnb_light_dfs["opflash"], data_bnb_light_dfs["hdr"])
data_bnb_light_dfs["pot"] = filter_using_hdr(data_bnb_light_dfs["pot"], data_bnb_light_dfs["hdr"])

## Check Normalization

In [None]:
def get_n_evt(df):
    unique_count = df.index.droplevel(
        list(df.index.names[2:])  # drop everything except first two levels
    ).nunique()
    return unique_count

In [None]:
## Collect the offbeam data fudge factor and scale for offbeam data
n_record_spill_data = get_n_evt(data_bnb_light_dfs['hdr'])
n_gates_data = len(data_bnb_light_dfs["pot"])

n_record_spill_offbeam_data = get_n_evt(data_offbeam_light_dfs['hdr'])
n_gates_offbeam_data = data_offbeam_light_dfs["hdr"][data_offbeam_light_dfs["hdr"]['first_in_subrun'] == 1]['noffbeambnb'].sum()

p_trig_data = n_record_spill_data / n_gates_data
p_trig_offbeam_data = n_record_spill_offbeam_data / n_gates_offbeam_data

f_factor = (p_trig_data - p_trig_offbeam_data) / (1 - p_trig_offbeam_data)
print("f_factor: %f" %f_factor)

intime_gate_scale = (1. - f_factor) * (n_gates_data + 0.) / (n_gates_offbeam_data + 0.)
print("intime_gate_scale: %f" %intime_gate_scale)


In [None]:
## Collect pot scale for MC
mc_tot_pot = mc_bnb_cosmic_dfs["hdr"]['pot'].sum()
mc_low_th_tot_pot = mc_rockbox_th1to100_dfs["hdr"]['pot'].sum()

data_tot_pot = data_bnb_light_dfs["hdr"]['pot'].sum()
data_tot_TOR860 = data_bnb_light_dfs["pot"]['TOR860'].sum()
data_tot_TOR875 = data_bnb_light_dfs["pot"]['TOR875'].sum()

print("mc_tot_pot: %e" %(mc_tot_pot))
print("mc_low_thtot_pot: %e" %(mc_low_th_tot_pot))

print("data_tot_pot: %e" %(data_tot_pot))
print("data_tot_TOR860: %e" %(data_tot_TOR860))
print("data_tot_TOR875: %e" %(data_tot_TOR875))

target_pot = data_tot_pot
mc_pot_scale = target_pot / mc_tot_pot
mc_low_th_scale = target_pot / mc_low_th_tot_pot
print("MC POT scale: %.3f" %(mc_pot_scale))
print("MC Low Th. POT scale: %.3f" %(mc_low_th_scale))


In [None]:
## Comparison between observed and expected total number of recorded spills
n_evt_mc = get_n_evt(mc_bnb_cosmic_dfs["hdr"])
n_evt_mc_low_th = get_n_evt(mc_rockbox_th1to100_dfs["hdr"])

print("n_evt_data_onbeam: %d" %n_record_spill_data)
print("n_evt_exp.: %f" %(n_evt_mc * mc_pot_scale + n_evt_mc_low_th * mc_low_th_scale +n_record_spill_offbeam_data * intime_gate_scale))
print("- n_evt_mc: %f" %(n_evt_mc * mc_pot_scale))
print("- n_evt_mc_low_th: %f" %(n_evt_mc_low_th * mc_low_th_scale))
print("- n_evt_data_offbeam: %f" %(n_record_spill_offbeam_data * intime_gate_scale))

## Analyzing OpFlash DataFrame and Collect PE cut Decision

### Check OpFlash distributions

In [None]:
def draw_a_distribution(df, col, title_x, x_min, x_max, nbins, title_y = "", is_logx = False, is_logy = False, label_top = ""):
    var = df[col]
    bins = np.logspace(np.log10(x_min), np.log10(x_max), nbins + 1) if is_logx else np.linspace(x_min, x_max, nbins + 1)
    bin_centers = np.sqrt(bins[:-1] * bins[1:]) if is_logx else 0.5 * (bins[:-1] + bins[1:])
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.hist(var, bins, weights=[np.ones_like(data) * 1. for data in var], histtype="step", label=["all"])
    if is_logy:
        ax.yscale('log')
    ax.set_xlabel(title_x)
    ax.set_ylabel(title_y)
    ax.legend()
    ax.text(1.0, 1.02, label_top, transform=ax.transAxes, fontsize=14, fontweight='bold', ha='right') 
    plt.show()

def abs_comp_total_pe_two_mc_stack(
    df_mc, df_mc_low_th, df_data_bnb_light, df_data_offbeam_light,
    col_mc, col_data, title_x, x_min, x_max, nbins,
    title_y = "", is_logx = False, is_logy = False, label_top = "",
    mc_scale = 1.533, low_th_mc_scale = 1., intime_scale = 1.
):
    mc_var = df_mc[col_mc]
    mc_low_th_var = df_mc_low_th[col_mc]
    data_bnb_light_var = df_data_bnb_light[col_data]
    data_offbeam_light_var = df_data_offbeam_light[col_data]

    # (kept for reference; override via intime_scale argument)
    data_offbeam_light_scale = 0.103

    # Binning
    bins = (np.logspace(np.log10(x_min), np.log10(x_max), nbins + 1)
            if is_logx else np.linspace(x_min, x_max, nbins + 1))
    bin_centers = (np.sqrt(bins[:-1] * bins[1:])
                   if is_logx else 0.5 * (bins[:-1] + bins[1:]))
    bin_widths = np.diff(bins)

    fig, ax = plt.subplots(figsize=(8, 5))

    # --- Data histogram (BNB light) ---
    ax.hist(
        data_bnb_light_var, bins=bins,
        histtype="step", color="black", linewidth=2,
        label="BNB + light data"
    )

    # --- Components: compute binned counts ---
    mc_hist, _ = np.histogram(mc_var, bins=bins)
    mc_low_th_hist, _ = np.histogram(mc_low_th_var, bins=bins)
    offbeam_hist, _ = np.histogram(data_offbeam_light_var, bins=bins)

    # Apply scales
    mc_hist = mc_hist * mc_scale
    mc_low_th_hist = mc_low_th_hist * low_th_mc_scale
    offbeam_hist = offbeam_hist * intime_scale

    # --- Stack components as bars (to show contributions) ---
    # Bottoms for stacking
    bottom0 = np.zeros_like(mc_hist, dtype=float)
    h1 = mc_hist
    h2 = mc_low_th_hist
    h3 = offbeam_hist

    # Use bars so stack works in both linear and log-x; they also respect log-y.
    # (We avoid specifying colors explicitly so you can theme globally if you like.)
    ax.bar(bins[:-1], h1, width=bin_widths, align="edge",
           alpha=0.45, edgecolor="none", label="MC (BNB+cosmic)")
    ax.bar(bins[:-1], h2, width=bin_widths, align="edge",
           bottom=h1, alpha=0.45, edgecolor="none", label="MC low-th")
    ax.bar(bins[:-1], h3, width=bin_widths, align="edge",
           bottom=h1 + h2, alpha=0.45, edgecolor="none", label="Off-beam light")

    # --- Total prediction overlay (dashed step) ---
    pred_hist = h1 + h2 + h3
    ax.step(bin_centers, pred_hist, where="mid", linewidth=2, linestyle="--",
            label="Total prediction", color='red')

    # Scales, labels, cosmetics
    if is_logx:
        ax.set_xscale("log")
    if is_logy:
        ax.set_yscale("log")

    ax.set_xlabel(title_x)
    ax.set_ylabel(title_y if title_y else "Events")
    ax.set_xlim(x_min, x_max)

    # Legend: put data first, then components, then total
    # (Matplotlib collects in order added; this already matches that.)
    ax.legend(frameon=False)

    if label_top:
        ax.text(1.0, 1.02, label_top, transform=ax.transAxes,
                fontsize=14, fontweight='bold', ha='right')

    plt.tight_layout()
    plt.show()
    

In [None]:
mc_bnb_cosmic_opflash_df = mc_bnb_cosmic_dfs['opflash']
mc_rockbox_th1to100_df = mc_rockbox_th1to100_dfs['opflash']
data_bnb_light_opflash_df = data_bnb_light_dfs['opflash']
data_offbeam_light_opflash_df = data_offbeam_light_dfs['opflash']

In [None]:
## Check OpFlash First Time distributions
draw_a_distribution(mc_bnb_cosmic_opflash_df, ('firsttime'), "First Time [us]", -15., 25., 100, "OpFlashes", False, False, "MC BNB + Cosmic")
draw_a_distribution(mc_rockbox_th1to100_df, ('firsttime'), "First Time [us]", -15., 25., 100, "OpFlashes", False, False, "MC BNB + Cosmic, Rockbox Th. 1 - 100 MeV")
draw_a_distribution(data_offbeam_light_opflash_df, ('firsttime'), "First Time [us]", -15., 25., 100, "OpFlashes", False, False, "Data Off-beam + Light")
draw_a_distribution(data_bnb_light_opflash_df, ('firsttime'), "First Time [us]", -15., 25., 100, "OpFlashes", False, False, "Data BNB + Light")

In [None]:
## Based on the previous block's result, we use [-5 us, +5 us] window for collecting OpFlashes corresponding to the trigger

mc_bnb_light_opflash_time_cut_df = mc_bnb_cosmic_opflash_df[(mc_bnb_cosmic_opflash_df.firsttime > -5.) & (mc_bnb_cosmic_opflash_df.firsttime < 5.)]
mc_rockbox_th1to100_opflash_time_cut_df = mc_rockbox_th1to100_df[(mc_rockbox_th1to100_df.firsttime > -5.) & (mc_rockbox_th1to100_df.firsttime < 5.)]
data_offbeam_light_opflash_df = data_offbeam_light_opflash_df[(data_offbeam_light_opflash_df.firsttime > -5.) & (data_offbeam_light_opflash_df.firsttime < 5.)]
data_bnb_light_opflash_df = data_bnb_light_opflash_df[(data_bnb_light_opflash_df.firsttime > -5.) & (data_bnb_light_opflash_df.firsttime < 5.)]

In [None]:
## Sum PEs for OpFlashes within the timing window

mc_bnb_cosmic_totalpe_sum_time_cut=(mc_bnb_light_opflash_time_cut_df.groupby(level=[0, 1])["totalpe"].sum())
mc_rockbox_th1to100_totalpe_sum_time_cut=(mc_rockbox_th1to100_opflash_time_cut_df.groupby(level=[0, 1])["totalpe"].sum())
data_offbeam_light_totalpe_sum_time_cut=(data_offbeam_light_opflash_df.groupby(level=[0, 1])["totalpe"].sum())
data_bnb_light_totalpe_sum_time_cut=(data_bnb_light_opflash_df.groupby(level=[0, 1])["totalpe"].sum())

In [None]:
## Add the total PE column to the hdr DataFrames, and fill empty (NaN) entries with 0.

mc_bnb_cosmic_dfs['hdr']['totalpe_timecut'] = mc_bnb_cosmic_totalpe_sum_time_cut
mc_bnb_cosmic_dfs['hdr']['totalpe_timecut'] = (mc_bnb_cosmic_dfs['hdr']['totalpe_timecut'].fillna(-1.))

mc_rockbox_th1to100_dfs['hdr']['totalpe_timecut'] = mc_rockbox_th1to100_totalpe_sum_time_cut
mc_rockbox_th1to100_dfs['hdr']['totalpe_timecut'] = (mc_rockbox_th1to100_dfs['hdr']['totalpe_timecut'].fillna(-1.))

data_offbeam_light_dfs['hdr']['totalpe_timecut'] = data_offbeam_light_totalpe_sum_time_cut
data_offbeam_light_dfs['hdr']['totalpe_timecut'] = (data_offbeam_light_dfs['hdr']['totalpe_timecut'].fillna(-1.))

data_bnb_light_dfs['hdr']['totalpe_timecut'] = data_bnb_light_totalpe_sum_time_cut
data_bnb_light_dfs['hdr']['totalpe_timecut'] = (data_bnb_light_dfs['hdr']['totalpe_timecut'].fillna(-1.))

In [None]:
## Check the total PE distributions

draw_a_distribution(mc_bnb_cosmic_dfs['hdr'], ('totalpe_timecut'), "Total PE", -10, 50000, 1000, "Entries", False, False, "MC BNB + Cosmic")
draw_a_distribution(mc_rockbox_th1to100_dfs['hdr'], ('totalpe_timecut'), "Total PE", -10, 50000, 1000, "Entries", False, False, "MC BNB + Cosmic, Rockbox Th. 1 - 100 MeV")
draw_a_distribution(data_offbeam_light_dfs['hdr'], ('totalpe_timecut'), "Total PE", -10, 50000, 1000, "Entries", False, False, "Data Off-beam + Light")
draw_a_distribution(data_bnb_light_dfs['hdr'], ('totalpe_timecut'), "Total PE", -10, 50000, 1000, "Entries", False, False, "Data BNB + Light")

### Apply PE scale correction to MC and collect PE cut decision

In [None]:
## Apply a scale to MC total PE
mc_bnb_cosmic_hdr_df = mc_bnb_cosmic_dfs['hdr']
mc_bnb_cosmic_hdr_df['totalpe_timecut_0p66'] = mc_bnb_cosmic_hdr_df['totalpe_timecut'] * 0.66

mc_rockbox_th1to100_hdr_df = mc_rockbox_th1to100_dfs['hdr']
mc_rockbox_th1to100_hdr_df['totalpe_timecut_0p66'] = mc_rockbox_th1to100_hdr_df['totalpe_timecut'] * 0.66

data_offbeam_light_hdr_df = data_offbeam_light_dfs['hdr']
data_bnb_light_hdr_df = data_bnb_light_dfs['hdr']

In [None]:
## Compared the default and scaled total PE columns
mc_bnb_cosmic_hdr_df[["totalpe_timecut", "totalpe_timecut_0p66"]]

In [None]:
## Check total PE distribution
abs_comp_total_pe_two_mc_stack(mc_bnb_cosmic_hdr_df, mc_rockbox_th1to100_hdr_df, data_bnb_light_hdr_df, data_offbeam_light_hdr_df, ('totalpe_timecut_0p66'), ('totalpe_timecut'), "Total PE", -10., 40000, 100, "Entries", False, False, "MC PE scale 0.66", mc_pot_scale, mc_low_th_scale, intime_gate_scale)
abs_comp_total_pe_two_mc_stack(mc_bnb_cosmic_hdr_df, mc_rockbox_th1to100_hdr_df, data_bnb_light_hdr_df, data_offbeam_light_hdr_df, ('totalpe_timecut_0p66'), ('totalpe_timecut'), "Total PE", -100, 5000, 100, "Entries", False, False, "MC PE scale 0.66", mc_pot_scale, mc_low_th_scale, intime_gate_scale)

In [None]:
## DEFINE PE CUT
total_pe_cut = 2000.
mc_bnb_cosmic_pe_mask = mc_bnb_cosmic_hdr_df["totalpe_timecut_0p66"] > total_pe_cut
mc_low_th_pe_mask = mc_rockbox_th1to100_hdr_df['totalpe_timecut_0p66'] > total_pe_cut
data_offbeam_pe_mask = data_offbeam_light_hdr_df['totalpe_timecut'] > total_pe_cut
data_bnb_light_pe_mask = data_bnb_light_hdr_df['totalpe_timecut'] > total_pe_cut

In [None]:
## Since evt DataFrame has more level in labels, we should make a mask for considering the difference
def get_evt_pe_mask(mask, df):
    sel_idx = mask[mask].index
    pairs = df.index.droplevel([-2, -1])
    mask2 = pairs.isin(sel_idx)
    return mask2

In [None]:
mc_bnb_cosmic_evt_pe_mask = get_evt_pe_mask(mc_bnb_cosmic_pe_mask, mc_bnb_cosmic_dfs["evt"])
mc_low_th_evt_pe_mask = get_evt_pe_mask(mc_low_th_pe_mask, mc_rockbox_th1to100_dfs["evt"])
data_offbeam_evt_pe_mask = get_evt_pe_mask(data_offbeam_pe_mask, data_offbeam_light_dfs["evt"])
data_bnb_light_evt_pe_mask = get_evt_pe_mask(data_bnb_light_pe_mask, data_bnb_light_dfs["evt"])

In [None]:
## Double checking for legnths of evt DataFrame and the mask array
print("mc_bnb_cosmic_dfs['evt'] length: %d" %len(mc_bnb_cosmic_dfs['evt']))
print("mc_bnb_cosmic_evt_pe_mask length: %d" %len(mc_bnb_cosmic_evt_pe_mask))

print("mc_rockbox_th1to100_dfs['evt'] length: %d" %len(mc_rockbox_th1to100_dfs['evt']))
print("mc_low_th_evt_pe_mask length: %d" %len(mc_low_th_evt_pe_mask))

print("data_offbeam_light_dfs['evt'] length: %d" %len(data_offbeam_light_dfs['evt']))
print("data_offbeam_evt_pe_mask length: %d" %len(data_offbeam_evt_pe_mask))

print("data_bnb_light_dfs['evt'] length: %d" %len(data_bnb_light_dfs['evt']))
print("data_bnb_light_evt_pe_mask length: %d" %len(data_bnb_light_evt_pe_mask))

In [None]:
## Add PE cut decision (boolean) column to evt DataFrame
mc_bnb_cosmic_dfs["evt"][('pemask', '', '', '', '', '')] = mc_bnb_cosmic_evt_pe_mask
mc_rockbox_th1to100_dfs["evt"][('pemask', '', '', '', '', '')] = mc_low_th_evt_pe_mask
data_bnb_light_dfs["evt"][('pemask', '', '', '', '', '')] = data_bnb_light_evt_pe_mask
data_offbeam_light_dfs["evt"][('pemask', '', '', '', '', '')] = data_offbeam_evt_pe_mask

## Define Fiducial Volumes

In [None]:
def InFV_trk(data): # cm
    xmin = -190.
    ymin = -190.
    zmin = 10.
    xmax = 190.
    ymax =  190.
    zmax =  450.
    return (data.x > xmin) & (data.x < xmax) & (data.y > ymin) & (data.y < ymax) & (data.z > zmin) & (data.z < zmax)

def InFV(data): # cm
    xmin = -190.
    ymin = -190.
    zmin = 10.
    xmax = 190.
    ymax =  190.
    zmax =  450.
    return (np.abs(data.x) > 10) & (np.abs(data.x) < 190) & (data.y > ymin) & (data.y < ymax) & (data.z > zmin) & (data.z < zmax)

def InFV_xy(data): # cm
    xmin = 10.
    ymin = -190.
    xmax = 190.
    ymax =  190.
    return (np.abs(data.x) > xmin) & (np.abs(data.x) < xmax) & (data.y > ymin) & (data.y < ymax)

## Vetoing high y and high z region
def InFV_nohiyz(data):
    xmin = 10.
    xmax = 190.
    zmin = 10.
    zmax = 450.
    ymax_highz = 100.
    pass_xz = (np.abs(data.x) > xmin) & (np.abs(data.x) < xmax) & (data.z > zmin) & (data.z < zmax)
    pass_y = ((data.z < 250) & (np.abs(data.y) < 190.)) | ((data.z > 250) & (data.y > -190.) & (data.y < ymax_highz))
    return pass_xz & pass_y

def InFV_nohiyz_trk(data):
    xmax = 190.
    zmin = 10.
    zmax = 450.
    ymax_highz = 100.
    pass_xz = (np.abs(data.x) < xmax) & (data.z > zmin) & (data.z < zmax)
    pass_y = ((data.z < 250) & (np.abs(data.y) < 190.)) | ((data.z > 250) & (data.y > -190.) & (data.y < ymax_highz))
    return pass_xz & pass_y


## Perform matching between evt and mcnu DataFrames. Assign nuint_categ

This is an example from numuCC coherent charged pion production.

In [None]:
def get_true_t(df):
    t = (df.E - df.mu.genE - df.cpi.genE)**2 - (df.momentum.x - df.mu.genp.x - df.cpi.genp.x)**2 - (df.momentum.y - df.mu.genp.y - df.cpi.genp.y)**2 - (df.momentum.z - df.mu.genp.z - df.cpi.genp.z)**2
    return np.abs(t)

In [None]:
true_t = get_true_t(mc_bnb_cosmic_dfs['mcnu']).fillna(999999)
mc_bnb_cosmic_dfs['mcnu']['true_t'] = true_t

In [None]:
## -- truth level flags
def Signal(df): # definition
    is_fv = InFV_nohiyz(df.position)
    is_1pi0p = (df.nmu_27MeV == 1) & (df.npi_30MeV == 1) & (df.np_20MeV == 0) & (df.npi0 == 0) & (df.nn_0MeV == 0) #(df.true_t < 0.1)
    return is_fv & is_1pi0p

def CCCOH(df):
    is_cc = df.iscc
    genie_mode = df.genie_mode
    return is_cc & (genie_mode == 3)

In [None]:
## For MC BNB + cosmic sample
is_fv = InFV_nohiyz(mc_bnb_cosmic_dfs['mcnu'].position)
is_signal = Signal(mc_bnb_cosmic_dfs['mcnu'])
is_cc = mc_bnb_cosmic_dfs['mcnu'].iscc
genie_mode = mc_bnb_cosmic_dfs['mcnu'].genie_mode
w = mc_bnb_cosmic_dfs['mcnu'].w

try :
    nuint_categ = pd.Series(8, index=mc_bnb_cosmic_dfs['mcnu'].index)
    #print(f"done init nuint_categ")
except Exception as e:
    print(f"Error init nuint_categ")

nuint_categ[~is_fv] = -1  # Out of FV
nuint_categ[is_fv & is_signal] = 1 # Signal
nuint_categ[is_fv & ~is_cc & ~is_signal] = 0  # NC
nuint_categ[is_fv & is_cc & ~is_signal & (genie_mode == 3)] = 2  # Non-signal CCCOH
nuint_categ[is_fv & is_cc & ~is_signal & (genie_mode == 0)] = 3  # CCQE
nuint_categ[is_fv & is_cc & ~is_signal & (genie_mode == 10)] = 4  # 2p2h
nuint_categ[is_fv & is_cc & ~is_signal & (genie_mode != 0) & (genie_mode != 3) & (genie_mode != 10) & ((w < 1.4) | (genie_mode == 1))] = 5  # RES
nuint_categ[is_fv & is_cc & ~is_signal & (genie_mode != 0) & (genie_mode != 3) & (genie_mode != 10) & ((w > 2.0) | (genie_mode == 2))] = 6  # DIS

mc_bnb_cosmic_dfs['mcnu']['nuint_categ'] = nuint_categ

In [None]:
## For MC Low Th. BNB + cosmic sample
is_fv = InFV_nohiyz(mc_rockbox_th1to100_dfs['mcnu'].position)
is_signal = Signal(mc_rockbox_th1to100_dfs['mcnu'])
is_cc = mc_rockbox_th1to100_dfs['mcnu'].iscc
genie_mode = mc_rockbox_th1to100_dfs['mcnu'].genie_mode
w = mc_rockbox_th1to100_dfs['mcnu'].w

try :
    nuint_categ = pd.Series(8, index=mc_rockbox_th1to100_dfs['mcnu'].index)
    #print(f"done init nuint_categ")
except Exception as e:
    print(f"Error init nuint_categ")

nuint_categ[~is_fv] = -1  # Out of FV
nuint_categ[is_fv & is_signal] = 1 # Signal
nuint_categ[is_fv & ~is_cc & ~is_signal] = 0  # NC
nuint_categ[is_fv & is_cc & ~is_signal & (genie_mode == 3)] = 2  # Non-signal CCCOH
nuint_categ[is_fv & is_cc & ~is_signal & (genie_mode == 0)] = 3  # CCQE
nuint_categ[is_fv & is_cc & ~is_signal & (genie_mode == 10)] = 4  # 2p2h
nuint_categ[is_fv & is_cc & ~is_signal & (genie_mode != 0) & (genie_mode != 3) & (genie_mode != 10) & ((w < 1.4) | (genie_mode == 1))] = 5  # RES
nuint_categ[is_fv & is_cc & ~is_signal & (genie_mode != 0) & (genie_mode != 3) & (genie_mode != 10) & ((w > 2.0) | (genie_mode == 2))] = 6  # DIS

mc_rockbox_th1to100_dfs['mcnu']['nuint_categ'] = nuint_categ

In [None]:
# match for mc bnb cosmic
mc_bnb_cosmic_dfs["mcnu"].columns = pd.MultiIndex.from_tuples([('gen',) + col if isinstance(col, tuple) else ('gen', col) for col in mc_bnb_cosmic_dfs["mcnu"].columns])
mc_bnb_cosmic_dfs["mcnu"].columns = pd.MultiIndex.from_tuples([
    col + ('',) * (6 - len(col)) for col in mc_bnb_cosmic_dfs["mcnu"].columns
])
mc_bnb_cosmic_dfs["evt"] = ph.multicol_merge(mc_bnb_cosmic_dfs["evt"].reset_index(), mc_bnb_cosmic_dfs["mcnu"].reset_index(),
                                            left_on=[('__ntuple', '', '', '', '', ''), ('entry', '', '', '', '', ''), ('slc','tmatch', 'idx', '', '', '')],
                                            right_on=[('__ntuple', '', '', '', '', ''), ('entry', '', '', '', '', ''), ('rec.mc.nu..index', '','', '', '', '')], 
                                            how="left") ## -- save all sllices
mc_bnb_cosmic_dfs["evt"] = mc_bnb_cosmic_dfs["evt"].set_index(["__ntuple", "entry", "rec.slc..index", "rec.slc.reco.pfp..index"], verify_integrity=True)
mc_bnb_cosmic_dfs["evt"].loc[mc_bnb_cosmic_dfs["evt"][('gen', 'nuint_categ', '', '', '', '')].isna(), [('gen', 'nuint_categ', '', '', '', '')]] = -2

mc_rockbox_th1to100_dfs["mcnu"].columns = pd.MultiIndex.from_tuples([('gen',) + col if isinstance(col, tuple) else ('gen', col) for col in mc_rockbox_th1to100_dfs["mcnu"].columns])
mc_rockbox_th1to100_dfs["mcnu"].columns = pd.MultiIndex.from_tuples([
    col + ('',) * (6 - len(col)) for col in mc_rockbox_th1to100_dfs["mcnu"].columns
])
mc_rockbox_th1to100_dfs["evt"] = ph.multicol_merge(mc_rockbox_th1to100_dfs["evt"].reset_index(), mc_rockbox_th1to100_dfs["mcnu"].reset_index(),
                            left_on=[('__ntuple', '', '', '', '', ''), ('entry', '', '', '', '', ''), ('slc','tmatch', 'idx', '', '', '')],
                            right_on=[('__ntuple', '', '', '', '', ''), ('entry', '', '', '', '', ''), ('rec.mc.nu..index', '','', '', '', '')], 
                            how="left") ## -- save all sllices
mc_rockbox_th1to100_dfs["evt"] = mc_rockbox_th1to100_dfs["evt"].set_index(["__ntuple", "entry", "rec.slc..index", "rec.slc.reco.pfp..index"], verify_integrity=True)
mc_rockbox_th1to100_dfs["evt"].loc[mc_rockbox_th1to100_dfs["evt"][('gen', 'nuint_categ', '', '', '', '')].isna(), [('gen', 'nuint_categ', '', '', '', '')]] = -2
mc_rockbox_th1to100_dfs["evt"][('gen', 'nuint_categ', '', '', '', '')] = -4

data_offbeam_light_dfs["evt"][('gen', 'nuint_categ', '', '', '', '')] = -3

In [None]:
## Check 
mc_bnb_cosmic_dfs["mcnu"].gen.nuint_categ.value_counts()

## Data vs MC plots for TPC variables

### Define plotting functions

In [None]:
## Note: the model_list and mode_labels are based on truth level event categories in the previous section

mode_list = [1, 2, 0, 3, 4, 5, 6, -1, -2, -3, -4]
mode_labels = ["Signal", "Non-Sig. CCCOH", "NC", "QE", "2p2h", "RES", "DIS", "Non-FV", "Others", "Intime Cosmics", "Low Th. Rockbox"]
colors = ['#d62728',  # Red            
          '#1f77b4',  # Blue
          '#ff7f0e',  # Orange
          '#2ca02c',  # Green
          '#17becf',  # Teal
          '#9467bd',  # Purple
          '#8c564b',  # Brown
          '#e377c2',  # Pink
          '#7f7f7f',  # Gray
          '#bcbd22',  # Yellow-green
          '#FFD700']  # Gold

def draw_reco_stacked_hist(var_mc, var_mc_low_th, var_offbeam_data, is_logx, is_logy,
                           title_x, title_y, x_min, x_max, nbins, outname,
                           data_overlay=False, var_data=[], draw_density=False):
    
    ## Define the output figure to have two pads
    fig = plt.figure(figsize=(8, 8), dpi=100)
    gs = gridspec.GridSpec(2, 1, height_ratios=[5, 1], hspace=0.10)
    ax_main = fig.add_subplot(gs[0])
    ax_ratio = fig.add_subplot(gs[1], sharex=ax_main)
    
    if is_logx:
        ax_main.set_xscale('log')
        ax_ratio.set_xscale('log')        
    if is_logy:
        ax_main.set_yscale('log')


    ax_main.set_xlabel("")  # Only bottom has x-label
    ax_main.set_ylabel(title_y)
    ax_ratio.set_ylabel("Data/MC", fontsize=12)
    ax_ratio.set_xlabel(title_x, fontsize = 20)

    ax_ratio.axhline(1.0, color='red', linestyle='--', linewidth=1)
    ax_ratio.set_ylabel("Data/MC", fontsize=12)
    ax_ratio.set_xlabel(title_x, fontsize=12)
    ax_ratio.set_ylim(0.5, 1.5)
    ax_ratio.tick_params(width=2, length=6)
    for spine in ax_ratio.spines.values():
        spine.set_linewidth(2)

    plt.setp(ax_main.get_xticklabels(), visible=False)

    ## Defind binning
    if is_logx:
        bins = np.logspace(np.log10(x_min), np.log10(x_max), nbins + 1)
        bin_centers = np.sqrt(bins[:-1] * bins[1:])
    else:
        bins = np.linspace(x_min, x_max, nbins + 1)
        bin_centers = 0.5 * (bins[:-1] + bins[1:])

    ## Define data for MC
    all_mc_data = var_mc + var_mc_low_th + var_offbeam_data
    all_weights = (
        [np.ones_like(data) * mc_pot_scale for data in var_mc] +
        [np.ones_like(data) * mc_low_th_scale for data in var_mc_low_th] + 
        [np.ones_like(data) * intime_gate_scale for data in var_offbeam_data]
    )
    each_mc_hist_data = [np.histogram(data, bins=bins, weights=w)[0] for data, w in zip(all_mc_data, all_weights)]
    total_mc = np.sum(each_mc_hist_data, axis=0)

    ## Plot stacked MC
    hist_data, bins, _ = ax_main.hist(all_mc_data,
                                      bins=bins,
                                      weights=all_weights,
                                      stacked=True,
                                      color=colors,
                                      label=mode_labels,
                                      edgecolor='none',
                                      linewidth=0,
                                      density=draw_density,
                                      histtype='stepfilled')

    max_y = np.max(total_mc)

    ## Plot MC stat error box
    each_mc_hist_data = []
    each_mc_hist_err2 = []  # sum of squared weights for error

    for data, w in zip(all_mc_data, all_weights):
        hist_vals, _ = np.histogram(data, bins=bins, weights=w)
        hist_err2, _ = np.histogram(data, bins=bins, weights=np.square(w))
        each_mc_hist_data.append(hist_vals)
        each_mc_hist_err2.append(hist_err2)

    total_mc = np.sum(each_mc_hist_data, axis=0)
    total_mc_err2 = np.sum(each_mc_hist_err2, axis=0)
    mc_stat_err = np.sqrt(total_mc_err2)
    #mc_stat_err = np.sqrt(total_mc)

    ax_main.bar(
       bin_centers,
        2 * mc_stat_err,
        width=np.diff(bins),
        bottom=total_mc - mc_stat_err,
        facecolor='none',             # transparent fill
        edgecolor='black',            # outline color of the hatching
        hatch='xxxx',                 # hatch pattern similar to ROOT's 3004
        linewidth=0.0,
        label='MC Stat. Unc.'
    )

    ax_main.tick_params(width=2, length=10)
    for spine in ax_main.spines.values():
        spine.set_linewidth(2)

    ## Draw Ratio error bar
    mc_stat_err_ratio = mc_stat_err / total_mc
    mc_content_ratio = total_mc / total_mc
    mc_stat_err_ratio = np.nan_to_num(mc_stat_err_ratio, nan=0.)
    mc_content_ratio = np.nan_to_num(mc_content_ratio, nan=-999.)
    ax_ratio.bar(
        bin_centers,
        2*mc_stat_err_ratio,
        width=np.diff(bins),
        bottom=mc_content_ratio - mc_stat_err_ratio,
        facecolor='none',             # transparent fill
        edgecolor='black',            # outline color of the hatching
        hatch='xxxx',                 # hatch pattern similar to ROOT's 3004
        linewidth=0.0,
        label='MC Stat. Unc.'
    )

    ## Draw data too
    if data_overlay:
        ax_main.set_ylabel("Events (POT = %.2e)" % target_pot)
        if draw_density:
            ax_main.set_ylabel("A.U.")

        ## Define data histogram
        counts, _ = np.histogram(var_data, bins=bins)

        bin_widths = np.diff(bins)
        total_data = np.sum(counts)
        norm_counts = counts
        data_eylow, data_eyhigh = sh.return_data_stat_err(counts)

        if draw_density:
            norm_counts = counts / (total_data * bin_widths)
            data_eylow = data_eylow / (total_data * bin_widths) if total_data > 0 else np.zeros_like(counts)
            data_eyhigh = data_eyhigh / (total_data * bin_widths) if total_data > 0 else np.zeros_like(counts)

        errors = data_eylow + data_eyhigh
        
        ## Plot data points on main histogram
        #ax_main.errorbar(bin_centers, norm_counts, yerr=errors,
        #                 fmt='o', color='black', label='Data',
        #                 markersize=5, capsize=3, linewidth=1.5)
        
        ax_main.errorbar(bin_centers, norm_counts,
                 yerr=np.vstack((data_eylow, data_eyhigh)),
                 fmt='o', color='black', label='Data',
                 markersize=5, capsize=3, linewidth=1.5)
        
        max_y_data = np.max(norm_counts + data_eyhigh)
        #print("max_y: %f" %(max_y))
        #print("max_y_data: %f" %(max_y_data))
        max_y = max(max_y, max_y_data)
        #print("max_y: %f" %(max_y))

        ## Make data/mc ratio plot
        data_ratio = norm_counts / total_mc
        data_ratio_eylow = data_eylow / total_mc
        data_ratio_eyhigh = data_eyhigh / total_mc
        data_ratio = np.nan_to_num(data_ratio, nan=-999.)
        data_ratio_eylow = np.nan_to_num(data_ratio_eylow, nan=0.)
        data_ratio_eyhigh = np.nan_to_num(data_ratio_eyhigh, nan=0.)
        
        #data_ratio_errors = data_ratio_eylow + data_ratio_eyhigh
        #ax_ratio.errorbar(bin_centers, data_ratio, yerr=data_ratio_errors,
        #                 fmt='o', color='black', label='Data',
        #                 markersize=5, capsize=3, linewidth=1.5)

        ax_ratio.errorbar(bin_centers, data_ratio,
                  yerr=np.vstack((data_ratio_eylow, data_ratio_eyhigh)),
                  fmt='o', color='black', label='Data',
                  markersize=5, capsize=3, linewidth=1.5)

    ## Set ax_main axes variables
    ax_main.set_xlim(x_min, x_max)
    ax_main.set_ylim(0.0, max_y * 1.5)
    if is_logy:
        ax_main.set_ylim(0.1, max_y * 600)
    
    # Legend with fractions
    accum_sum = [np.sum(data) for data in hist_data]
    accum_sum = [0.] + accum_sum
    total_sum = accum_sum[-1]
    individual_sums = [accum_sum[i + 1] - accum_sum[i] for i in range(len(accum_sum) - 1)]
    fractions = [(count / total_sum) * 100 for count in individual_sums]
    legend_labels = [f"{label} ({frac:.1f}%)" for label, frac in zip(mode_labels[::-1], fractions[::-1])]
    if data_overlay:
        if draw_density:
            legend_labels.append("Data")
        else:
            legend_labels.append(f"Total MC Stat. Unc. ({total_sum:.0f})")
            legend_labels.append(f"Data ({total_data:.0f})")
            #legend_labels.append(f"Data ({total_data:.0f})")
    ax_main.legend(legend_labels, loc='upper left', fontsize=8, frameon=False, ncol=3, bbox_to_anchor=(0.05, 0.98))

    legend_labels_ratio = ["y=1", "MC (Stat. Only)", "Data/MC"]
    ax_ratio.legend(legend_labels_ratio, loc='upper left', fontsize=6, frameon=False, ncol=3, bbox_to_anchor=(0.05, 0.98))


    ax_main.text(0.00, 1.02, "SBND " + sample_str + ", Preliminary",
                 transform=ax_main.transAxes, fontsize=14, fontweight='bold')

    #fig.savefig("./plots/pandora_df/2025_v10_06_00_05/data_vs_mc_tpc/" + outname + ".pdf", format='pdf', bbox_inches='tight')
    plt.show()
    plt.close()

def draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_low_th_df, data_offbeam_df, data_df, column, x_title, y_title, x_min, x_max, n_bins, out_name, is_logx = False, is_logy = False):
    nuint_categ_col = ('gen', 'nuint_categ', '', '', '', '')

    mc_bnb_cosmic_df_per_slc = mc_bnb_cosmic_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column, nuint_categ_col]].first()
    mc_low_th_df_per_slc = mc_low_th_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column, nuint_categ_col]].first()
    data_offbeam_df_per_slc = data_offbeam_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column, nuint_categ_col]].first()

    data_df_per_slc = data_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column]].first()

    mode_list_mc = [m for m in mode_list if ((m != -3) and (m != -4))]
    var_mc_bnb_cosmic = [mc_bnb_cosmic_df_per_slc[mc_bnb_cosmic_df_per_slc[nuint_categ_col] == mode][column]for mode in mode_list_mc]
    var_mc_low_th = [mc_low_th_df_per_slc[mc_low_th_df_per_slc.gen.nuint_categ == -4][column]]
    var_data_offbeam = [data_offbeam_df_per_slc[data_offbeam_df_per_slc.gen.nuint_categ == -3][column]]
    var_data = data_df_per_slc[column]
    draw_reco_stacked_hist(var_mc_bnb_cosmic, var_mc_low_th, var_data_offbeam, is_logx, is_logy, x_title, y_title, x_min, x_max, n_bins, out_name, True, var_data)

def draw_reco_valid_plots(mc_bnb_cosmic_df, mc_low_th_df, data_offbeam_df, data_df, suffix, is_logx = False, is_logy = False):
    ## draw 1) clear cosmic, 2) nu score, 3) vertex x,y and z
    
    ## -- 1) Clear cosmic
    clear_cosmic_col = ('slc', 'is_clear_cosmic', '', '', '', '')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_low_th_df, data_offbeam_df, data_df, clear_cosmic_col, "Is Clear Cosmic", "A.U.", -0.5, 1.5, 2, suffix + "_slc_is_clear_cosmic", is_logx, is_logy)

    ## -- 2) vertex x,y,z
    vtx_x_col = ('slc', 'vertex', 'x', '', '', '')
    vtx_y_col = ('slc', 'vertex', 'y', '', '', '')
    vtx_z_col = ('slc', 'vertex', 'z', '', '', '')
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_low_th_df, data_offbeam_df, data_df, vtx_x_col, "Slice Vertex X [cm]", "A.U.", -300, 300, 60, suffix + "_slc_vtx_x", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_low_th_df, data_offbeam_df, data_df, vtx_y_col, "Slice Vertex Y [cm]", "A.U.", -300, 300, 60, suffix + "_slc_vtx_y", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df, mc_low_th_df, data_offbeam_df, data_df, vtx_z_col, "Slice Vertex Z [cm]", "A.U.", -100, 600, 70, suffix + "_slc_vtx_z", is_logx, is_logy)

    mc_bnb_cosmic_df_east = mc_bnb_cosmic_df[mc_bnb_cosmic_df.slc.vertex.x < 0.]
    mc_low_th_df_east = mc_low_th_df[mc_low_th_df.slc.vertex.x < 0.]
    data_offbeam_df_east = data_offbeam_df[data_offbeam_df.slc.vertex.x < 0.]
    data_df_east = data_df[data_df.slc.vertex.x < 0.]

    mc_bnb_cosmic_df_west = mc_bnb_cosmic_df[mc_bnb_cosmic_df.slc.vertex.x > 0.]
    mc_low_th_df_west = mc_low_th_df[mc_low_th_df.slc.vertex.x > 0.]
    data_offbeam_df_west = data_offbeam_df[data_offbeam_df.slc.vertex.x > 0.]
    data_df_west = data_df[data_df.slc.vertex.x > 0.]

    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df_east, mc_low_th_df_east, data_offbeam_df_east, data_df_east, vtx_y_col, "East TPC Slice Vertex Y [cm]", "A.U.", -300, 300, 60, suffix + "_slc_vtx_y_east", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df_west, mc_low_th_df_east, data_offbeam_df_east, data_df_west, vtx_y_col, "West TPC Slice Vertex Y [cm]", "A.U.", -300, 300, 60, suffix + "_slc_vtx_y_west", is_logx, is_logy)
    
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df_east, mc_low_th_df_west, data_offbeam_df_west, data_df_east, vtx_z_col, "East TPC Slice Vertex Z [cm]", "A.U.", -100, 600, 70, suffix + "_slc_vtx_z_east", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(mc_bnb_cosmic_df_west, mc_low_th_df_west, data_offbeam_df_west, data_df_west, vtx_z_col, "West TPC Slice Vertex Z [cm]", "A.U.", -100, 600, 70, suffix + "_slc_vtx_z_west", is_logx, is_logy)

    ## -- 3) Check longest and second longest track variables
    longtrk_mc = mc_bnb_cosmic_df.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(0)
    shorttrk_mc = mc_bnb_cosmic_df.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(1)

    longtrk_mc_low_th = mc_low_th_df.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(0)
    shorttrk_mc_low_th = mc_low_th_df.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(1)

    longtrk_data_offbeam = data_offbeam_df.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(0)
    shorttrk_data_offbeam = data_offbeam_df.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(1)

    longtrk_data = data_df.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(0)
    shorttrk_data = data_df.sort_values(by=("pfp", "trk", "len", "", "", ""), ascending=False).groupby(level=[0,1]).nth(1)

    trk_score_col = ('pfp', 'trackScore', '', '', '', '')
    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_score_col, "Longest Track Track Score", "A.U.", 0, 1., 50, suffix + "_trk_score_long", False, False)
    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, trk_score_col, "Second Longest Track Track Score", "A.U.", 0, 1., 50, suffix + "_trk_score_short", False, False)

    chi2_muon_col = ('pfp', 'trk', 'chi2pid', 'I2', 'chi2_muon', '')
    chi2_proton_col = ('pfp', 'trk', 'chi2pid', 'I2', 'chi2_proton', '')
    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, chi2_muon_col, r"$\mathrm{Longest~Track~\chi^2_{\mu}}$", "A.U.", 0, 70., 50, suffix + "_chi2pid_muon_long", False, False)    
    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, chi2_proton_col, r"$\mathrm{Longest~Track~\chi^2_{p}}$", "A.U.", 0, 400., 50, suffix + "_chi2pid_proton_long", False, False)

    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, chi2_muon_col, r"$\mathrm{Second~Longest~Track~\chi^2_{\mu}}$", "A.U.", 0, 70., 50, suffix + "_chi2pid_muon_short", False, False)
    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, chi2_proton_col, r"$\mathrm{Second~Longest~Track~\chi^2_{p}}$", "A.U.", 0, 400., 50, suffix + "_chi2pid_proton_short", False, False)
    
    
    trk_dir_x_col = ('pfp', 'trk', 'dir', 'x', '', '')
    trk_dir_y_col = ('pfp', 'trk', 'dir', 'y', '', '')
    trk_dir_z_col = ('pfp', 'trk', 'dir', 'z', '', '')

    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_dir_x_col, "Longest Track Dir. X", "A.U.", -1.2, 1.2, 60, suffix + "_trk_dir_x_long")
    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_dir_y_col, "Longest Track Dir. Y", "A.U.", -1.2, 1.2, 60, suffix + "_trk_dir_y_long")
    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_dir_z_col, "Longest Track Dir. Z", "A.U.", -1.2, 1.2, 60, suffix + "_trk_dir_z_long")
    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_dir_z_col, "Longest Track Dir. Z", "A.U.", -1.2, 1.2, 60, suffix + "_trk_dir_z_long_logy", False, True)


    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, trk_dir_y_col, "Second Longest Track Dir. Y", "A.U.", -1.2, 1.2, 60, suffix + "_trk_dir_y_short")
    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, trk_dir_z_col, "Second Longest Track Dir. Z", "A.U.", -1.2, 1.2, 60, suffix + "_trk_dir_z_short_logy", False, True)

    trk_len_col = ('pfp', 'trk', 'len', '', '', '')
    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_len_col, "Longest Track Length [cm]", "A.U.", 0.5, 800., 80, suffix + "_trk_len_long", True, False)
    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, trk_len_col, "Second Longest Track Length [cm]", "A.U.", 0.5, 800, 80, suffix + "_trk_len_short", True, False)

    trk_start_x_col = ('pfp', 'trk', 'start', 'x', '', '')
    trk_start_y_col = ('pfp', 'trk', 'start', 'y', '', '')
    trk_start_z_col = ('pfp', 'trk', 'start', 'z', '', '')

    trk_end_x_col = ('pfp', 'trk', 'end', 'x', '', '')
    trk_end_y_col = ('pfp', 'trk', 'end', 'y', '', '')
    trk_end_z_col = ('pfp', 'trk', 'end', 'z', '', '')
    
    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_start_x_col, "Longest Track Start X [cm]", "A.U.", -300, 300, 120, suffix + "_trk_start_x_long", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_end_x_col, "Longest Track End X [cm]", "A.U.", -300, 300, 120, suffix + "_trk_end_x_long", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, trk_start_x_col, "Second Longest Track Start X [cm]", "A.U.", -300, 300, 120, suffix + "_trk_start_x_short", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, trk_end_x_col, "Second Longest Track End X [cm]", "A.U.", -300, 300, 120, suffix + "_trk_end_x_short", is_logx, is_logy)

    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_start_y_col, "Longest Track Start Y [cm]", "A.U.", -300, 300, 120, suffix + "_trk_start_y_long", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_end_y_col, "Longest Track End Y [cm]", "A.U.", -300, 300, 120, suffix + "_trk_end_y_long", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, trk_start_y_col, "Second Longest Track Start Y [cm]", "A.U.", -300, 300, 120, suffix + "_trk_start_y_short", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, trk_end_y_col, "Second Longest Track End Y [cm]", "A.U.", -300, 300, 120, suffix + "_trk_end_y_short", is_logx, is_logy)

    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_start_z_col, "Longest Track Start Z [cm]", "A.U.", -100, 600, 70, suffix + "_trk_start_z_long", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(longtrk_mc, longtrk_mc_low_th, longtrk_data_offbeam, longtrk_data, trk_end_z_col, "Longest Track End Z [cm]", "A.U.", -100, 600, 70, suffix + "_trk_end_z_long", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, trk_start_z_col, "Second Longest Track Start Z [cm]", "A.U.", -100, 600, 70, suffix + "_trk_start_z_short", is_logx, is_logy)
    draw_mc_data_shape_comp_per_slc(shorttrk_mc, shorttrk_mc_low_th, shorttrk_data_offbeam, shorttrk_data, trk_end_z_col, "Second Longest Track End Z [cm]", "A.U.", -100, 600, 70, suffix + "_trk_end_z_short", is_logx, is_logy)

    #trk_x_ranges = [(0, 20), (20, 40), (40, 60), (60, 80), (80, 100), (100, 120), (120, 140), (140, 160), (160, 180), (180, 200)]
    #for lo, hi in trk_x_ranges:
    #    label = f"{int(lo)}_{int(hi)}"
    #    shorttrk_mc_cut = shorttrk_mc[(shorttrk_mc[trk_start_x_col] >= lo) & (shorttrk_mc[trk_start_x_col] < hi)]
    #    shorttrk_intime_mc_cut = shorttrk_intime_mc[(shorttrk_intime_mc[trk_start_x_col] >= lo) & (shorttrk_intime_mc[trk_start_x_col] < hi)]
    #    shorttrk_data_cut = shorttrk_data[(shorttrk_data[trk_start_x_col] >= lo) & (shorttrk_data[trk_start_x_col] < hi)]

    #    draw_mc_data_shape_comp_per_slc(shorttrk_mc_cut, shorttrk_intime_mc_cut, shorttrk_data_cut, chi2_muon_col, f"Second Longest Track Chi2_mu (Track Start X: {lo}–{hi} cm)", "A.U.", 0, 70., 35, suffix + f"_chi2pid_muon_short_start_x_{label}", is_logx, is_logy)



    ## -- 4) more details for the longest track in slice
    #trk_len_ranges = [(0, 10), (10, 20), (20, 40), (40, 80), (80, 120), (120, 200), (200, 300), (300, 500), (500, 1000)]

    #for lo, hi in trk_len_ranges:
    #    label = f"{int(lo)}_{int(hi)}"
    
    #    longtrk_mc_cut = longtrk_mc[(longtrk_mc[trk_len_col] >= lo) & (longtrk_mc[trk_len_col] < hi)]
    #    longtrk_intime_mc_cut = longtrk_intime_mc[(longtrk_intime_mc[trk_len_col] >= lo) & (longtrk_intime_mc[trk_len_col] < hi)]
    #    longtrk_data_cut = longtrk_data[(longtrk_data[trk_len_col] >= lo) & (longtrk_data[trk_len_col] < hi)]

    #    draw_mc_data_shape_comp_per_slc(longtrk_mc_cut, longtrk_intime_mc_cut, longtrk_data_cut, trk_dir_x_col, f"Longest Track Dir. X (Track Length: {lo}–{hi} cm)", "A.U.", -1.2, 1.2, 60, suffix + f"_trk_dir_x_long_len_{label}", is_logx, is_logy)
    #    draw_mc_data_shape_comp_per_slc(longtrk_mc_cut, longtrk_intime_mc_cut, longtrk_data_cut, trk_dir_y_col, f"Longest Track Dir. Y (Track Length: {lo}–{hi} cm)", "A.U.", -1.2, 1.2, 60, suffix + f"_trk_dir_y_long_len_{label}", is_logx, is_logy)
    #    draw_mc_data_shape_comp_per_slc(longtrk_mc_cut, longtrk_intime_mc_cut, longtrk_data_cut, trk_dir_z_col, f"Longest Track Dir. Z (Track Length: {lo}–{hi} cm)", "A.U.", -1.2, 1.2, 60, suffix + f"_trk_dir_z_long_len_{label}", is_logx, is_logy)
    #    draw_mc_data_shape_comp_per_slc(longtrk_mc_cut, longtrk_intime_mc_cut, longtrk_data_cut, trk_score_col, f"Longest Track Track Score (Track Length: {lo}–{hi} cm)", "A.U.", -1.2, 1.2, 60, suffix + f"_trk_score_long_len_{label}", is_logx, is_logy)



### For drawing data blind plots: this function does not draw data points in bins with singal contribution greater than 10%
def draw_reco_stacked_hist_blind(
    var_mc, var_mc_low_th, var_offbeam_data, is_logx, is_logy,
    title_x, title_y, x_min, x_max, nbins, outname,
    data_overlay=False, var_data=[], draw_density=False,
    blind=True, blind_req=0.1
):
    fig = plt.figure(figsize=(8, 8), dpi=100)
    gs = gridspec.GridSpec(2, 1, height_ratios=[5, 1], hspace=0.10)
    ax_main = fig.add_subplot(gs[0])
    ax_ratio = fig.add_subplot(gs[1], sharex=ax_main)

    if is_logx:
        ax_main.set_xscale('log')
        ax_ratio.set_xscale('log')
    if is_logy:
        ax_main.set_yscale('log')

    ax_main.set_xlabel("")
    ax_main.set_ylabel(title_y)
    ax_ratio.set_ylabel("Data/MC", fontsize=12)
    ax_ratio.set_xlabel(title_x, fontsize=12)
    ax_ratio.axhline(1.0, color='red', linestyle='--', linewidth=1)
    ax_ratio.set_ylim(0.4, 1.6)
    ax_ratio.tick_params(width=2, length=6)
    for spine in ax_ratio.spines.values():
        spine.set_linewidth(2)

    plt.setp(ax_main.get_xticklabels(), visible=False)

    bins = np.logspace(np.log10(x_min), np.log10(x_max), nbins + 1) if is_logx else np.linspace(x_min, x_max, nbins + 1)
    bin_centers = np.sqrt(bins[:-1] * bins[1:]) if is_logx else 0.5 * (bins[:-1] + bins[1:])

    all_mc_data = var_mc + var_mc_low_th + var_offbeam_data
    all_weights = (
        [np.ones_like(data) * mc_pot_scale for data in var_mc] +
        [np.ones_like(data) * mc_low_th_scale for data in var_mc_low_th] +
        [np.ones_like(data) * intime_gate_scale for data in var_offbeam_data]
    )

    each_mc_hist_data = [
        np.histogram(data, bins=bins, weights=w)[0]
        for data, w in zip(all_mc_data, all_weights)
    ]
    total_mc = np.sum(each_mc_hist_data, axis=0)

    hist_data, bins, _ = ax_main.hist(
        all_mc_data, bins=bins, weights=all_weights,
        stacked=True, color=colors, label=mode_labels,
        edgecolor='none', linewidth=0, density=draw_density, histtype='stepfilled'
    )

    max_y = np.max(total_mc)
    each_mc_hist_data = []
    each_mc_hist_err2 = []  # sum of squared weights for error

    for data, w in zip(all_mc_data, all_weights):
        hist_vals, _ = np.histogram(data, bins=bins, weights=w)
        hist_err2, _ = np.histogram(data, bins=bins, weights=np.square(w))
        each_mc_hist_data.append(hist_vals)
        each_mc_hist_err2.append(hist_err2)

    total_mc = np.sum(each_mc_hist_data, axis=0)
    total_mc_err2 = np.sum(each_mc_hist_err2, axis=0)
    mc_stat_err = np.sqrt(total_mc_err2)
    #mc_stat_err = np.sqrt(total_mc)

    ax_main.bar(
        bin_centers, 2 * mc_stat_err,
        width=np.diff(bins), bottom=total_mc - mc_stat_err,
        facecolor='none', edgecolor='black', hatch='xxxx',
        linewidth=0.0, label='MC Stat. Unc.'
    )

    mc_stat_err_ratio = np.nan_to_num(mc_stat_err / total_mc, nan=0.)
    mc_content_ratio = np.nan_to_num(total_mc / total_mc, nan=-999.)
    ax_ratio.bar(
        bin_centers, 2 * mc_stat_err_ratio,
        width=np.diff(bins), bottom=mc_content_ratio - mc_stat_err_ratio,
        facecolor='none', edgecolor='black', hatch='xxxx',
        linewidth=0.0, label='MC Stat. Unc.'
    )

    if data_overlay:
        ax_main.set_ylabel("A.U." if draw_density else f"Events (POT = {target_pot:.2e})")

        counts, _ = np.histogram(var_data, bins=bins)
        bin_widths = np.diff(bins)
        total_data = np.sum(counts)

        norm_counts = counts
        data_eylow, data_eyhigh = sh.return_data_stat_err(counts)
        if draw_density and total_data > 0:
            norm_counts = counts / (total_data * bin_widths)
            data_eylow = data_eylow / (total_data * bin_widths)
            data_eyhigh = data_eyhigh / (total_data * bin_widths)
        elif draw_density:
            norm_counts[:] = 0
            data_eylow[:] = 0
            data_eyhigh[:] = 0

        ### --> Apply blinding to bins where Signal fraction > blind_req
        if blind:
            signal_mc = each_mc_hist_data[0]  # index 0 = "Signal"
            signal_frac = np.nan_to_num(signal_mc / total_mc, nan=0.)
            blind_bins = signal_frac > blind_req
            norm_counts[blind_bins] = -1
            data_eylow[blind_bins] = 0
            data_eyhigh[blind_bins] = 0

        ax_main.errorbar(
            bin_centers, norm_counts,
            yerr=np.vstack((data_eylow, data_eyhigh)),
            fmt='o', color='black', label='Data',
            markersize=5, capsize=3, linewidth=1.5
        )

        max_y_data = np.max(norm_counts + data_eyhigh)
        max_y = max(max_y, max_y_data)

        data_ratio = np.nan_to_num(norm_counts / total_mc, nan=-1.)
        data_ratio_eylow = np.nan_to_num(data_eylow / total_mc, nan=0.)
        data_ratio_eyhigh = np.nan_to_num(data_eyhigh / total_mc, nan=0.)

        ax_ratio.errorbar(
            bin_centers, data_ratio,
            yerr=np.vstack((data_ratio_eylow, data_ratio_eyhigh)),
            fmt='o', color='black', label='Data',
            markersize=5, capsize=3, linewidth=1.5
        )

    ax_main.set_xlim(x_min, x_max)
    ax_main.set_ylim(0.1 if is_logy else 0.0, max_y * (600 if is_logy else 1.5))

    accum_sum = [np.sum(data) for data in hist_data]
    accum_sum = [0.] + accum_sum
    total_sum = accum_sum[-1]
    individual_sums = [accum_sum[i+1] - accum_sum[i] for i in range(len(accum_sum)-1)]
    fractions = [(count / total_sum) * 100 for count in individual_sums]
    legend_labels = [f"{label} ({frac:.1f}%)" for label, frac in zip(mode_labels[::-1], fractions[::-1])]

    if data_overlay:
        if draw_density:
            legend_labels.append("Data")
        else:
            legend_labels.append(f"Total MC Stat. Unc. ({total_sum:.0f})")
            legend_labels.append(f"Data ({total_data:.0f})")

    ax_main.legend(legend_labels, loc='upper left', fontsize=10, frameon=False, ncol=3, bbox_to_anchor=(0.05, 0.98))
    ax_ratio.legend(["y=1", "MC (Stat. Only)", "Data/MC"], loc='upper left', fontsize=7, frameon=False, ncol=3, bbox_to_anchor=(0.05, 0.98))

    ax_main.text(0.00, 1.02, "SBND " + sample_str + ", Preliminary",
                 transform=ax_main.transAxes, fontsize=14, fontweight='bold')

    #fig.savefig(f"./plots/pandora_df/2025_v10_06_00_05/data_vs_mc_tpc/{outname}.pdf", format='pdf', bbox_inches='tight')
    plt.show()
    plt.close()

def draw_mc_data_shape_comp_per_slc_blind(mc_bnb_cosmic_df, mc_low_th_df, data_offbeam_df, data_df, column, x_title, y_title, x_min, x_max, n_bins, out_name, is_logx = False, is_logy = False, blind = True, blind_req = 0.1):
    nuint_categ_col = ('gen', 'nuint_categ', '', '', '', '')

    mc_bnb_cosmic_df_per_slc = mc_bnb_cosmic_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column, nuint_categ_col]].first()
    mc_low_th_df_per_slc = mc_low_th_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column, nuint_categ_col]].first()
    data_offbeam_df_per_slc = data_offbeam_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column, nuint_categ_col]].first()
    data_df_per_slc = data_df.groupby([('__ntuple'), ('entry'), ('rec.slc..index')])[[column]].first()

    mode_list_mc = [m for m in mode_list if ((m != -3) and (m != -4))]
    var_mc_bnb_cosmic = [mc_bnb_cosmic_df_per_slc[mc_bnb_cosmic_df_per_slc[nuint_categ_col] == mode][column]for mode in mode_list_mc]
    var_mc_low_th = [mc_low_th_df_per_slc[mc_low_th_df_per_slc.gen.nuint_categ == -4][column]]
    var_data_offbeam = [data_offbeam_df_per_slc[data_offbeam_df_per_slc.gen.nuint_categ == -3][column]]
    var_data = data_df_per_slc[column]

    draw_reco_stacked_hist_blind(var_mc_bnb_cosmic, var_mc_low_th, var_data_offbeam, is_logx, is_logy, x_title, y_title, x_min, x_max, n_bins, out_name, True, var_data, False, blind, blind_req)

### Perform evt selections and plot

#### No cut

In [None]:
draw_reco_valid_plots(mc_bnb_cosmic_dfs["evt"], mc_rockbox_th1to100_dfs["evt"], data_offbeam_light_dfs["evt"], data_bnb_light_dfs["evt"], "a_a_no_cut")

#### PE cut

In [None]:
mc_bnb_cosmic_dfs["evt"] = mc_bnb_cosmic_dfs["evt"][mc_bnb_cosmic_dfs["evt"].pemask]
mc_rockbox_th1to100_dfs["evt"] = mc_rockbox_th1to100_dfs["evt"][mc_rockbox_th1to100_dfs["evt"].pemask]
data_offbeam_light_dfs["evt"] = data_offbeam_light_dfs["evt"][data_offbeam_light_dfs["evt"].pemask]
data_bnb_light_dfs["evt"] = data_bnb_light_dfs["evt"][data_bnb_light_dfs["evt"].pemask]

In [None]:
draw_reco_valid_plots(mc_bnb_cosmic_dfs["evt"], mc_rockbox_th1to100_dfs["evt"], data_offbeam_light_dfs["evt"], data_bnb_light_dfs["evt"], "a_a_b_pe_cut")

#### Vtx in FV

In [None]:
mc_bnb_cosmic_dfs["evt"] = mc_bnb_cosmic_dfs["evt"][InFV_nohiyz(mc_bnb_cosmic_dfs["evt"].slc.vertex)]
mc_rockbox_th1to100_dfs["evt"] = mc_rockbox_th1to100_dfs["evt"][InFV_nohiyz(mc_rockbox_th1to100_dfs["evt"].slc.vertex)]
data_offbeam_light_dfs["evt"] = data_offbeam_light_dfs["evt"][InFV_nohiyz(data_offbeam_light_dfs["evt"].slc.vertex)]
data_bnb_light_dfs["evt"] = data_bnb_light_dfs["evt"][InFV_nohiyz(data_bnb_light_dfs["evt"].slc.vertex)]

In [None]:
draw_reco_valid_plots(mc_bnb_cosmic_dfs["evt"], mc_rockbox_th1to100_dfs["evt"], data_offbeam_light_dfs["evt"], data_bnb_light_dfs["evt"], "a_b_vtxfvcut")

#### Not clear cosmic

In [None]:
mc_bnb_cosmic_dfs["evt"] = mc_bnb_cosmic_dfs["evt"][mc_bnb_cosmic_dfs["evt"].slc.is_clear_cosmic == 0]
mc_rockbox_th1to100_dfs["evt"] = mc_rockbox_th1to100_dfs["evt"][mc_rockbox_th1to100_dfs["evt"].slc.is_clear_cosmic == 0]
data_offbeam_light_dfs["evt"] = data_offbeam_light_dfs["evt"][data_offbeam_light_dfs["evt"].slc.is_clear_cosmic == 0]
data_bnb_light_dfs["evt"] = data_bnb_light_dfs["evt"][data_bnb_light_dfs["evt"].slc.is_clear_cosmic == 0]

In [None]:
draw_reco_valid_plots(mc_bnb_cosmic_dfs["evt"], mc_rockbox_th1to100_dfs["evt"], data_offbeam_light_dfs["evt"], data_bnb_light_dfs["evt"], "a_vtx_fv_and_not_clear_cosmic")