# Importing Libraries

In [1]:
import uproot
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tqdm.notebook as tqdm


# Loading Root Files Into Pandas Dataframes

In [2]:
# replace this with a path to the WC ntuple files
wc_file_location = "/Users/leehagaman/data/processed_checkout_rootfiles/"

# let's open a nu_overlay file and examine the contents

f_nu_overlay = uproot.open(wc_file_location + "checkout_prodgenie_bnb_nu_overlay_run1_PF.root")
f_nu_overlay.keys()


['wcpselection;1',
 'wcpselection/T_PFeval;69',
 'wcpselection/T_PFeval;68',
 'wcpselection/T_BDTvars;13',
 'wcpselection/T_BDTvars;12',
 'wcpselection/T_eval;2',
 'wcpselection/T_eval;1',
 'wcpselection/T_pot;1',
 'wcpselection/T_KINEvars;1']

### We see that there are several TTrees inside the wcpselection TDirectory. Now let's print the variables in each TTree.

In [3]:
already_printed_keys = []

for key in f_nu_overlay["wcpselection"].items():
    key_name = key[0].split(";")[0]
    if key_name not in already_printed_keys: # sometimes a TTree is repeated, we ignore duplicates
        print("\nPrinting variables in TTree: ", key[0].split(";")[0])
        print(f_nu_overlay["wcpselection"][key[0]].keys())
        already_printed_keys.append(key_name)


Printing variables in TTree:  T_PFeval
['run', 'subrun', 'event', 'neutrino_type', 'reco_nuvtxX', 'reco_nuvtxY', 'reco_nuvtxZ', 'reco_showervtxX', 'reco_showervtxY', 'reco_showervtxZ', 'reco_showerKE', 'reco_muonvtxX', 'reco_muonvtxY', 'reco_muonvtxZ', 'reco_muonMomentum', 'nuvtx_diff', 'showervtx_diff', 'muonvtx_diff', 'truth_corr_nuvtxX', 'truth_corr_nuvtxY', 'truth_corr_nuvtxZ', 'truth_corr_showervtxX', 'truth_corr_showervtxY', 'truth_corr_showervtxZ', 'truth_showerKE', 'truth_corr_muonvtxX', 'truth_corr_muonvtxY', 'truth_corr_muonvtxZ', 'truth_muonvtxX', 'truth_muonvtxY', 'truth_muonvtxZ', 'truth_muonendX', 'truth_muonendY', 'truth_muonendZ', 'truth_muonMomentum', 'truth_nuEnergy', 'truth_energyInside', 'truth_electronInside', 'truth_nuPdg', 'truth_isCC', 'truth_vtxX', 'truth_vtxY', 'truth_vtxZ', 'truth_nuTime', 'truth_nuIntType', 'truth_NCDelta', 'truth_NprimPio', 'truth_pio_energy_1', 'truth_pio_energy_2', 'truth_pio_angle', 'reco_protonMomentum', 'reco_showerMomentum', 'reco_Np

### That's a lot of variables! Let's only load a few of the most relevant ones for now.

## Choosing Variables To Load

In [4]:
T_bdt_vars = [              # variables involved with BDT training
    "nue_score",                    # BDT score for nue selection, used for the WC inclusive nueCC analysis
    "numu_score",                   # BDT score for numu selection, used for the WC inclusive numuCC selections
]

T_eval_vars = [             # variables involved with low level reconstruction and truth information
    "run",                          # run number
    "subrun",                       # subrun number
    "event",                        # event number
    "match_isFC",                   # reconstructed cluster is fully contained (FC), boolean
    "truth_nuEnergy",               # true neutrino energy (MeV)
    "truth_nuPdg",                  # true neutrino pdg code
    "truth_isCC",                   # true interaction type is charged current, boolean
    "match_completeness_energy",    # the true energy deposited in the clusters that are 3D-matched with the reconstructed neutrino clusters (MeV)
    "truth_energyInside",           # the true energy deposited in the TPC Fiducial Volume (MeV)
    "truth_vtxInside",              # boolean, true neutrino vertex is inside the TPC Fiducial Volume
    "truth_vtxX",                   # true neutrino vertex x (cm)
    "truth_vtxY",                   # true neutrino vertex y (cm)
    "truth_vtxZ",                   # true neutrino vertex z (cm)
    "weight_cv",                    # untuned GENIE event weight
    "weight_spline",                # additional MicroBooNE Tune weight
]
T_eval_data_vars = [        # same as above, but for data files we do not attempt to load any truth information
    "match_isFC",
]

T_kine_vars = [             # variables involved with kinematic reconstruction
    "kine_reco_Enu",                # reconstructed neutrino energy (MeV)   
]

T_pf_vars = [               # variables involved with individual particles
    "truth_NprimPio",
    "truth_NCDelta",
    "reco_nuvtxX",
    "reco_nuvtxY",
    "reco_nuvtxZ",
    "reco_muonMomentum",            # reconstructed muon momentum 4-vector (p_x, p_y, p_z, p_t), in (GeV/c, GeV/c, GeV/c, GeV)

    # These variables are related to individual true particles
    "truth_Ntrack",
    "truth_id",
    "truth_pdg",
    "truth_mother",
    "truth_startMomentum",
    "truth_startXYZT",

    # These variables are related to individual reco particles
    "reco_Ntrack",
    "reco_id",
    "reco_pdg",
    "reco_mother",
    "reco_startMomentum",
    "reco_startXYZT",
]
T_pf_data_vars = [          # same as above, but for data files we do not attempt to load any truth information
    "reco_nuvtxX",
    "reco_nuvtxY",
    "reco_nuvtxZ",
    "reco_muonMomentum",
    "reco_Ntrack",
    "reco_id",
    "reco_pdg",
    "reco_mother",
    "reco_startMomentum",
    "reco_startXYZT",
]


### Loading nu_overlay File

In [5]:
f_nu_overlay = uproot.open(wc_file_location + "checkout_prodgenie_bnb_nu_overlay_run1_PF.root") # loading the nu_overlay file

# loading variables from each TTree
nu_overlay_bdt_df = f_nu_overlay["wcpselection"]["T_BDTvars"].arrays(T_bdt_vars, library="pd")
nu_overlay_eval_df = f_nu_overlay["wcpselection"]["T_eval"].arrays(T_eval_vars, library="pd")
nu_overlay_kine_df = f_nu_overlay["wcpselection"]["T_KINEvars"].arrays(T_kine_vars, library="pd")
nu_overlay_pf_df = f_nu_overlay["wcpselection"]["T_PFeval"].arrays(T_pf_vars, library="pd")

# combining everything into a single dataframe
nu_overlay_df = pd.concat([nu_overlay_bdt_df, nu_overlay_eval_df, nu_overlay_kine_df, nu_overlay_pf_df], axis=1)

# deleting temporary dataframes to free up memory
del nu_overlay_bdt_df, nu_overlay_eval_df, nu_overlay_kine_df, nu_overlay_pf_df

nu_overlay_df

Unnamed: 0,nue_score,numu_score,run,subrun,event,match_isFC,truth_nuEnergy,truth_nuPdg,truth_isCC,match_completeness_energy,...,truth_pdg,truth_mother,truth_startMomentum,truth_startXYZT,reco_Ntrack,reco_id,reco_pdg,reco_mother,reco_startMomentum,reco_startXYZT
0,-7.811869,2.852163,6219,97,4867,False,1489.831421,14,True,384.035736,...,"[13, 2212, 2212, 2212, 2212, 2212, 2112, 2212,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 1...","[[0.2546793818473816, -0.0954013466835022, 0.2...","[[87.71573638916016, -109.88015747070312, 951....",22,"[65001, 65002, 65007, 65004, 66008, 67009, 680...","[13, 2212, 11, 11, 11, 2212, 11, 211, 211, 211...","[0, 0, 0, 0, 66022, 67023, 68024, 69025, 70026...","[[0.10423357039690018, -0.05407045781612396, 0...","[[87.59615325927734, -109.97093200683594, 951...."
1,-15.000000,-3.340668,6219,97,4869,False,728.165955,14,True,440.520691,...,"[13, 2212, 211, 2212, 2212, 2112, 2112, 2112, ...","[0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 12, 9,...","[[-0.13679924607276917, -0.17215533554553986, ...","[[56.76708984375, -93.32466888427734, 143.5903...",0,[],[],[],[],[]
2,-15.000000,-3.821578,6219,97,4896,True,710.893616,14,True,101.743752,...,"[13, 2212, 211, 1000180400, 14, -13, -14, 12, ...","[0, 0, 0, 3, 3, 3, 26, 26, 26, 2, 2, 2, 1, 1, ...","[[-0.08121907711029053, 0.06210789084434509, 0...","[[3.473398208618164, 43.196075439453125, 649.0...",0,[],[],[],[],[]
3,-15.000000,-3.821578,6219,97,4899,True,1149.019531,14,True,0.000000,...,"[13, 2112, 2212, 2112, 2212, 1000170390, 2212,...","[0, 0, 0, 2, 2, 2, 13, 13, 1, 1, 1, 12, 12, 12...","[[-0.43865516781806946, -0.02003403939306736, ...","[[200.43023681640625, -148.55886840820312, 371...",0,[],[],[],[],[]
4,-15.000000,-3.821578,6219,99,4967,True,889.859497,14,False,2.173244,...,"[14, 2112, 211, 2112, 1000180400, 2112, 2112, ...","[0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 3, 3, 31, 31...","[[-0.11623919010162354, 0.23554888367652893, 0...","[[213.08460998535156, 116.43939971923828, 723....",0,[],[],[],[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
581936,-15.000000,-3.340668,6921,127,6379,False,797.798584,14,False,0.000000,...,"[14, 2212, 2212, 2212, 2212, 2212, 2212, 2212,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[0.41942548751831055, 0.11730318516492844, 0....","[[127.62874603271484, 153.9810028076172, 255.5...",0,[],[],[],[],[]
581937,-15.000000,-3.340668,6921,127,6383,False,2494.067383,14,True,268.215149,...,"[13, 2212, 2112, 11, 11, 11, 11]","[0, 0, 0, 1, 1, 1, 1]","[[0.3938135504722595, 0.48017796874046326, 2.2...","[[232.5511932373047, 55.8696403503418, -36.529...",0,[],[],[],[],[]
581938,-15.000000,-3.821578,6921,127,6384,True,235.531815,14,False,0.000000,...,"[14, 2112, 1000180400, 2112, 1000180400, 2112,...","[0, 0, 2, 2, 2, 4, 4, 11, 11, 16]","[[0.013871511444449425, -0.12251489609479904, ...","[[183.54531860351562, -38.23894119262695, 56.4...",0,[],[],[],[],[]
581939,-15.000000,-3.340668,6921,127,6385,False,796.507874,14,True,0.000000,...,"[13, 2212, 2112, 2112, 2212, 2112, 2112, 10001...","[0, 0, 2, 2, 2, 2, 2, 2, 21, 21, 144, 144, 20,...","[[0.3590925335884094, -0.03354886174201965, 0....","[[187.59303283691406, -88.04727935791016, 1076...",0,[],[],[],[],[]


### Loading Data, Dirt, and EXT files

# Making A Histogram

# Plotting An Efficiency

# Particle-level Information