In [1]:
#import uproot
import pandas as pd
import numpy as np
import uproot3 as uproot
from tqdm import tqdm

print(uproot.__version__)

3.14.4


In [2]:
glee_file_location = "/data1/hagaman/glee_files/SBNfit_files/"
wc_file_location = "/data1/hagaman/xin_files/processed_checkout_rootfiles/" # training events removed


In [3]:
# the variables we want to extract from the glee files
glee_vars = [
    "run_number",
    "subrun_number",
    "event_number",
    "reco_shower_energy_plane0",
    "reco_shower_energy_plane1",
    "reco_shower_energy_plane2",
    "reco_shower_energy_max",
    "reco_vertex_x",
    "reco_vertex_y",
    "reco_vertex_z",
    "mctruth_nu_vertex_x",
    "mctruth_nu_vertex_y",
    "mctruth_nu_vertex_z",
    "mctruth_nu_E",
]

# the variables we want to extract from the WC files
wc_vars = [
    "data_or_pred",
    "run",
    "subrun",
    "event",
    "category",
    "WC_file",
    "match_isFC",
    "kine_reco_Enu",
    "reco_showerKE",
    "nc_delta_score",
    "WC_reco_num_protons",
    "WC_reco_num_other_tracks",
    "reco_nuvtxX",
    "reco_nuvtxY",
    "reco_nuvtxZ",
    "truth_vtxX",
    "truth_vtxY",
    "truth_vtxZ",
    "truth_nuEnergy",
    "net_weight",
]

# the variables we want to extract from the WC files
wc_data_vars = [
    "data_or_pred",
    "run",
    "subrun",
    "event",
    "WC_file",
    "match_isFC",
    "kine_reco_Enu",
    "reco_showerKE",
    "nc_delta_score",
    "WC_reco_num_protons",
    "WC_reco_num_other_tracks",
    "reco_nuvtxX",
    "reco_nuvtxY",
    "reco_nuvtxZ",
]



In [4]:
# loading glee root files

glee_1g0p_location = glee_file_location + "1g0p/"

f_glee_1g0p_CC1Pi0 = uproot.open(glee_1g0p_location + "sbnfit_1g0pMar2020_stage_4_CC1Pi0.root")["singlephoton"]


In [5]:
# putting information from the glee root files into pandas dataframes


# 1g0p pred files


glee_1g0p_CC1Pi0_df = pd.concat([
    f_glee_1g0p_CC1Pi0["vertex_tree"].pandas.df(glee_vars, flatten=False),
    f_glee_1g0p_CC1Pi0["simple_tree"].pandas.df(["simple_pot_weight"], flatten=False),
    ], axis=1, sort=False)
del f_glee_1g0p_CC1Pi0
glee_1g0p_CC1Pi0_df["glee_file"] = "CC1Pi0"
glee_1g0p_CC1Pi0_df["glee_selection"] = "1g0p"



In [6]:
# variables to load from WC files

bdt_vars = [
    "nc_delta_score",
]

extra_variables = [
    "run",
    "subrun",
    "event",
    "nuvtx_diff",
    "showervtx_diff",
    "muonvtx_diff",
    "truth_isCC",
    "truth_vtxInside",
    "truth_nuPdg",
    "truth_nuEnergy",
    "truth_nuIntType",
    "truth_energyInside",
    "weight_spline",
    "weight_cv",
    "weight_lee",
    "event_type",
    "weight",
    "lowEweight"
]

kine_scalar_vars = [
    "kine_reco_add_energy",
    "kine_pio_mass",
    "kine_pio_flag",
    "kine_pio_vtx_dis",
    "kine_pio_energy_1",
    "kine_pio_theta_1",
    "kine_pio_phi_1",
    "kine_pio_dis_1",
    "kine_pio_energy_2",
    "kine_pio_theta_2",
    "kine_pio_phi_2",
    "kine_pio_dis_2",
    "kine_pio_angle"
]

kine_vector_vars = [
    "kine_energy_particle",
    "kine_energy_info",
    "kine_particle_type",
    "kine_energy_included",
]

eval_mc_variables = [
    "run",
    "subrun",
    "event",
    "flash_time",
    "weight_spline", # this and remaining only make sense for MC
    "weight_cv",
    "match_completeness_energy",
    "truth_nuEnergy",
    "truth_energyInside",
    "truth_electronInside",
    "truth_nuPdg",
    "truth_isCC",
    "truth_isFC",
    "truth_vtxInside",
    "truth_vtxX",
    "truth_vtxY",
    "truth_vtxZ",
    "truth_nuTime",
]

eval_data_variables = [
    "run",
    "subrun",
    "event",
    "flash_time",
]


pf_eval_mc_variables = [
    "truth_NprimPio",
    "truth_NCDelta",
    "nuvtx_diff",
    "showervtx_diff",
    "reco_showerKE",
    "truth_pio_energy_1",
    "truth_pio_energy_2",
    "reco_nuvtxX",
    "reco_nuvtxY",
    "reco_nuvtxZ",
    "reco_showervtxX",
    "reco_showervtxY",
    "reco_showervtxZ",
    "truth_pdg",
    "truth_mother",
    "truth_startMomentum",
    "truth_startXYZT",
]

pf_eval_data_variables = [ # also use these for dirt
    "reco_showerKE",
    "reco_nuvtxX",
    "reco_nuvtxY",
    "reco_nuvtxZ",
    "reco_showervtxX",
    "reco_showervtxY",
    "reco_showervtxZ",
]

In [7]:
# loading WC dirt files

f_dirt_run1 = uproot.open(wc_file_location + "checkout_prodgenie_bnb_dirt_overlay_run1_PF.root")["wcpselection"]
f_dirt_run1_bdt = f_dirt_run1["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_dirt_run1_eval = f_dirt_run1["T_eval"].pandas.df(eval_data_variables + ["match_isFC"], flatten=False)
f_dirt_run1_pfeval = f_dirt_run1["T_PFeval"].pandas.df(pf_eval_mc_variables, flatten=False)
f_dirt_run1_kine = f_dirt_run1["T_KINEvars"].pandas.df(kine_scalar_vars + kine_vector_vars + ["kine_reco_Enu"], flatten=False)
f_dirt_run1_pot = f_dirt_run1["T_pot"].pandas.df("pot_tor875good", flatten=False)
dirt_run1_POT = np.sum(f_dirt_run1_pot["pot_tor875good"].to_numpy())
dirt_run1_df = pd.concat([f_dirt_run1_bdt, f_dirt_run1_eval, f_dirt_run1_pfeval, f_dirt_run1_kine], axis=1, sort=False)
del f_dirt_run1
del f_dirt_run1_bdt
del f_dirt_run1_eval
del f_dirt_run1_pfeval
del f_dirt_run1_kine
del f_dirt_run1_pot
dirt_run1_df["isEXT"] = [0 for i in range(dirt_run1_df.shape[0])]
dirt_run1_df["isDirt"] = [1 for i in range(dirt_run1_df.shape[0])]
dirt_run1_df["WC_file"] = ["dirt_run1" for i in range(dirt_run1_df.shape[0])]
dirt_run1_df["run_num"] = [1 for i in range(dirt_run1_df.shape[0])]



In [8]:
glee_1g0p_CC1Pi0_df.query("run_number == 7051 and subrun_number == 68 and event_number == 3443")[
    ["run_number", "subrun_number", "event_number",
    "mctruth_nu_vertex_x", "mctruth_nu_vertex_y", "mctruth_nu_vertex_z", "mctruth_nu_E"]]

Unnamed: 0_level_0,run_number,subrun_number,event_number,mctruth_nu_vertex_x,mctruth_nu_vertex_y,mctruth_nu_vertex_z,mctruth_nu_E
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
19,7051,68,3443,23.482481,-113.791052,497.760532,2.758254


In [9]:
dirt_run1_df.query("run == 7051 and subrun == 68 and event == 3443")[
    ["run", "subrun", "event", "truth_pdg", "truth_mother", "truth_startMomentum", "truth_startXYZT"]]

Unnamed: 0_level_0,run,subrun,event,truth_pdg,truth_mother,truth_startMomentum,truth_startXYZT
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
157466,7051,68,3443,"[13, 14, 1000170400]","[0, 580, 580]","[[0.34874234, 0.4686843, 0.6042773, 0.8471133]...","[[-165.10895, -211.68216, -303.98862, 4085.021..."


In [10]:
dirt_run1_df.query("run == 7051 and subrun == 68 and event == 3443")["truth_startXYZT"].to_numpy()

array([array([[-165.10895, -211.68216, -303.98862, 4085.021  ],
              [  25.01392,  -36.90118,   34.7536 , 4305.271  ],
              [  25.01392,  -36.90118,   34.7536 , 4305.271  ]], dtype=float32)],
      dtype=object)

In [11]:
glee_1g0p_CC1Pi0_df.query("run_number == 7019 and subrun_number == 465 and event_number == 23292")[
    ["run_number", "subrun_number", "event_number",
    "mctruth_nu_vertex_x", "mctruth_nu_vertex_y", "mctruth_nu_vertex_z", "mctruth_nu_E"]]

Unnamed: 0_level_0,run_number,subrun_number,event_number,mctruth_nu_vertex_x,mctruth_nu_vertex_y,mctruth_nu_vertex_z,mctruth_nu_E
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
47,7019,465,23292,171.075836,60.16144,810.685493,1.216065


In [12]:
dirt_run1_df.query("run == 7019 and subrun == 465 and event == 23292")[
    ["run", "subrun", "event", "truth_pdg", "truth_mother", "truth_startMomentum", "truth_startXYZT"]]

Unnamed: 0_level_0,run,subrun,event,truth_pdg,truth_mother,truth_startMomentum,truth_startXYZT
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
36263,7019,465,23292,"[2112, 1000180400, 2112, 2112, 2112, 100018038...","[376, 379, 379, 379, 379, 379, 932, 932, 931, ...","[[-0.12830096, -0.022758622, -0.35440627, 1.01...","[[35.097126, -117.77977, 1347.2516, 3989.7185]..."


In [14]:
dirt_run1_df.query("run == 7019 and subrun == 465 and event == 23292")["truth_pdg"].to_numpy()

array([array([      2112, 1000180400,       2112,       2112,       2112,
              1000180380,       2112, 1000260560, 1000180400,       2112,
              1000180400,       2112, 1000180400,       2112,       2112,
                    2112, 1000180380,       2112, 1000180400,       2112,
              1000180400], dtype=int32)                                  ],
      dtype=object)

In [13]:
dirt_run1_df.query("run == 7019 and subrun == 465 and event == 23292")["truth_startXYZT"].to_numpy()

array([array([[ 3.50971260e+01, -1.17779770e+02,  1.34725159e+03,
                3.98971851e+03],
              [-2.97089062e+01,  2.98197508e+00,  1.08183765e+03,
                4.01777417e+03],
              [-9.50935960e-01,  9.68708649e+01,  9.90210449e+02,
                4.03001855e+03],
              [-9.50935960e-01,  9.68708649e+01,  9.90210449e+02,
                4.03001855e+03],
              [-9.50935960e-01,  9.68708649e+01,  9.90210449e+02,
                4.03001855e+03],
              [-9.50935960e-01,  9.68708649e+01,  9.90210449e+02,
                4.03001855e+03],
              [-3.76471519e+01,  9.31039352e+01,  8.68757202e+02,
                4.08463428e+03],
              [-3.76471519e+01,  9.31039352e+01,  8.68757202e+02,
                4.08463428e+03],
              [-1.33001506e-01,  9.74020386e+01,  9.93436279e+02,
                4.03132349e+03],
              [-9.56099701e+00,  1.05586006e+02,  1.00310828e+03,
                4.03777686e+03],
          