In [None]:
import uproot
import polars as pl
print("uproot version: ", uproot.__version__)

import tempfile
import subprocess

import sys
import os



import matplotlib.pyplot as plt
import numpy as np

import ast

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from src.file_locations import data_files_location


In [None]:
num_events = 1000

f = uproot.open(data_files_location + "/checkout_MCC9.10_Run4a4c4d5_v10_04_07_13_BNB_nu_overlay_surprise_reco2_hist_4c.root")

weight_spline_times_tune = f["nuselection"]["NeutrinoSelectionFilter"]["weightSplineTimesTune"].array(library="np", entry_stop=num_events)
weight_cv = f["wcpselection"]["T_eval"]["weight_cv"].array(library="np", entry_stop=num_events)
weight_spline = f["wcpselection"]["T_eval"]["weight_spline"].array(library="np", entry_stop=num_events)
weight_cv_times_spline = weight_cv * weight_spline

wc_kine_reco_Enus = f["wcpselection"]["T_KINEvars"]["kine_reco_Enu"].array(library="np", entry_stop=num_events)


In [None]:
def _cxx_escape(s):
    """Escapes strings for use in C++ code."""
    return s.replace('\\', '\\\\').replace('"', '\\"')

def get_all_event_weights(
    file_path: str,
    tree_path: str = "nuselection/NeutrinoSelectionFilter",
    branch_name: str = "weights",
    max_entries: int = -1,
    root_bin: str = "root"):
    
    cpp_macro = r'''
#include <TFile.h>
#include <TTree.h>
#include <TBranch.h>
#include <TSystem.h>
#include <TROOT.h>
#include <TInterpreter.h> // Header needed for gInterpreter
#include <vector>
#include <string>
#include <map>
#include <iostream>
#include <iomanip>

// Define the exact type expected by the branch (confirmed as double by your last code)
using WeightMap_t = std::map<std::string, std::vector<double>>;

void extract_event_weights(const char* filePath, const char* treePath, const char* branchName, Long64_t maxEntries=-1) {
    
    gInterpreter->GenerateDictionary("map<string,vector<double>>", "map;string;vector");
    
    TFile* f = TFile::Open(filePath);
    if (!f || f->IsZombie()) { std::cout << "__ERROR__ Cannot open file\n"; return; }

    TObject* obj = f->Get(treePath);
    TTree* t = dynamic_cast<TTree*>(obj);
    if (!t) { std::cout << "__ERROR__ Tree not found at path\n"; f->Close(); return; }
    
    TBranch* br = t->GetBranch(branchName);
    if (!br) { std::cout << "__ERROR__ Branch not found\n"; f->Close(); return; }

    WeightMap_t* weight_map_ptr = nullptr;
    // This SetBranchAddress call should now succeed
    t->SetBranchAddress(branchName, &weight_map_ptr);

    Long64_t nentries = t->GetEntries();
    if (maxEntries >= 0 && maxEntries < nentries) nentries = maxEntries;

    std::cout << "__BEGIN__\n";
    for (Long64_t i = 0; i < nentries; ++i) {
        
        if (t->GetEntry(i) <= 0) { 
            std::cout << "{}\n";
            continue;
        }

        if (!weight_map_ptr) {
            std::cout << "{}\n";
            continue;
        }

        const auto& weight_map = *weight_map_ptr;

        std::cout << "{";
        
        bool first_pair = true;
        for (const auto& pair : weight_map) {
            if (!first_pair) {
                std::cout << ", ";
            }
            
            const std::string& key = pair.first;
            const std::vector<double>& weights = pair.second;

            std::cout << "\"" << key << "\": [";
            
            for (size_t j = 0; j < weights.size(); ++j) {
                if (j) std::cout << ",";
                std::cout << std::scientific << weights.at(j); 
            }
            std::cout << "]";
            
            first_pair = false;
        }
        
        std::cout << "}\n";
    }
    std::cout << "__END__\n";
    f->Close();
}
'''
    # --- Python Execution Logic (using the env-injected subprocess call) ---
    file_path_cxx = _cxx_escape(os.path.abspath(file_path))
    tree_path_cxx = _cxx_escape(tree_path)
    branch_name_cxx = _cxx_escape(branch_name)

    with tempfile.TemporaryDirectory() as td:
        macro_path = os.path.join(td, "extract_event_weights.C")
        with open(macro_path, "w") as f:
            f.write(cpp_macro)

        arg_expr = f'{macro_path}("{file_path_cxx}","{tree_path_cxx}","{branch_name_cxx}",{int(max_entries)})'
        cmd = [root_bin, "-l", "-b", "-q", arg_expr]

        # Use the environment-injecting subprocess.run
        proc = subprocess.run(
            cmd, 
            text=True, 
            capture_output=True,
            env=os.environ.copy() # Keeps the path for the 'root' executable
        )
        combined = (proc.stdout or "") + "\n" + (proc.stderr or "")
        
        # print(combined)
        
        begin = "__BEGIN__"
        end = "__END__"
        
        if begin not in combined or end not in combined:
            raise RuntimeError(f"Failed to parse ROOT output or macro crashed.\nReturn code: {proc.returncode}\n--- stdout ---\n{proc.stdout}\n--- stderr ---\n{proc.stderr}")

        payload = combined.split(begin, 1)[1].split(end, 1)[0]
        rows = []

        for line in payload.strip().splitlines():
            line = line.strip()
            if not line:
                continue
            data = ast.literal_eval(line)
            if isinstance(data, dict):
                rows.append(data)
            elif data == {}:
                rows.append({})
                
        return rows

In [None]:
all_event_weights = get_all_event_weights(
    data_files_location + "/checkout_MCC9.10_Run4a4c4d5_v10_04_07_13_BNB_nu_overlay_surprise_reco2_hist_4c.root",
    max_entries=num_events,
)


In [None]:
print(f"len(all_event_weights): {len(all_event_weights)}")

for k, v in all_event_weights[0].items():
    print(f"{k}: {len(v)} weights, first 5: {v[:5]}")

In [None]:
weights_All_UBGenie = np.array([event["All_UBGenie"] for event in all_event_weights])
weights_AxFFCCQEshape = np.array([event["AxFFCCQEshape_UBGenie"] for event in all_event_weights])
weights_DecayAngMEC = np.array([event["DecayAngMEC_UBGenie"] for event in all_event_weights])
weights_NormCCCOH = np.array([event["NormCCCOH_UBGenie"] for event in all_event_weights])
weights_NormNCCOH = np.array([event["NormNCCOH_UBGenie"] for event in all_event_weights])
weights_RPA_CCQE = np.array([event["RPA_CCQE_UBGenie"] for event in all_event_weights])
weights_ThetaDelta2NRad = np.array([event["ThetaDelta2NRad_UBGenie"] for event in all_event_weights])
weights_Theta_Delta2Npi = np.array([event["Theta_Delta2Npi_UBGenie"] for event in all_event_weights])
weights_VecFFCCQEshape = np.array([event["VecFFCCQEshape_UBGenie"] for event in all_event_weights])
weights_XSecShape_CCMEC = np.array([event["XSecShape_CCMEC_UBGenie"] for event in all_event_weights])
weights_flux_all = np.array([event["flux_all"] for event in all_event_weights])
weights_reint_all = np.array([event["reint_all"] for event in all_event_weights])
weights_xsr_scc_Fa3_SCC = np.array([event["xsr_scc_Fa3_SCC"] for event in all_event_weights])
weights_xsr_scc_Fv3_SCC = np.array([event["xsr_scc_Fv3_SCC"] for event in all_event_weights])


In [None]:
bins = np.linspace(0, 2000, 11)

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_All_UBGenie.shape[1])):
    curr_event_weights = weights_All_UBGenie[:, uni_i] * weight_spline
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"GENIE_All uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_AxFFCCQEshape.shape[1])):
    curr_event_weights = weights_AxFFCCQEshape[:, uni_i] * weight_spline
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"AxFFCCQEshape uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_DecayAngMEC.shape[1])):
    curr_event_weights = weights_DecayAngMEC[:, uni_i] * weight_spline
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"DecayAngMEC uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_NormCCCOH.shape[1])):
    curr_event_weights = weights_NormCCCOH[:, uni_i] * weight_spline
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"NormCCCOH uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_NormNCCOH.shape[1])):
    curr_event_weights = weights_NormNCCOH[:, uni_i] * weight_spline
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"NormNCCOH uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_RPA_CCQE.shape[1])):
    curr_event_weights = weights_RPA_CCQE[:, uni_i] * weight_spline
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"RPA_CCQE uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_ThetaDelta2NRad.shape[1])):
    curr_event_weights = weights_ThetaDelta2NRad[:, uni_i] * weight_spline
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"ThetaDelta2NRad uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_Theta_Delta2Npi.shape[1])):
    curr_event_weights = weights_Theta_Delta2Npi[:, uni_i] * weight_spline
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"Theta_Delta2Npi uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_VecFFCCQEshape.shape[1])):
    curr_event_weights = weights_VecFFCCQEshape[:, uni_i] * weight_spline
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"VecFFCCQEshape uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_XSecShape_CCMEC.shape[1])):
    curr_event_weights = weights_XSecShape_CCMEC[:, uni_i] * weight_spline
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"XSecShape_CCMEC uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_flux_all.shape[1])):
    curr_event_weights = weights_flux_all[:, uni_i] * weight_spline_times_tune
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"flux_all uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_reint_all.shape[1])):
    curr_event_weights = weights_reint_all[:, uni_i] * weight_spline_times_tune
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"reint_all uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_xsr_scc_Fa3_SCC.shape[1])):
    curr_event_weights = weights_xsr_scc_Fa3_SCC[:, uni_i] * weight_spline_times_tune # for the xsr_scc weights, need to multiply by the CV weight
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"xsr_scc_Fa3_SCC uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()

plt.figure()
plt.hist(wc_kine_reco_Enus, weights=weight_spline_times_tune, bins=bins, histtype="step", label="CV", color="k", lw=2)
for uni_i in range(min(10, weights_xsr_scc_Fv3_SCC.shape[1])):
    curr_event_weights = weights_xsr_scc_Fv3_SCC[:, uni_i] * weight_spline_times_tune # for the xsr_scc weights, need to multiply by the CV weight
    plt.hist(wc_kine_reco_Enus, weights=curr_event_weights, bins=bins, histtype="step", label=f"xsr_scc_Fv3_SCC uni {uni_i}")
plt.xlabel("wc_kine_reco_Enu")
plt.ylabel("weighted event count")
plt.legend()
plt.show()
