In [26]:
import uproot
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import nue as nue
from tqdm import tqdm
import importlib
____ = importlib.reload(nue)

nu_idx_set = ["ntuple","entry","nu_index"]
slc_idx_set = ["ntuple","entry","rec.slc__index"]
evt_idx_set = ["run","subrun","event"]

hist_path = "/sbnd/data/users/lynnt/v09_75_03_03/intrnue_hists.root"
file      = "/sbnd/data/users/lynnt/v09_75_03_03/intrnue.df"

razz_path = hist_path+":ncpizeroana/events"
opt0_path = hist_path+":opt0finder/flash_match_tree"

In [27]:
opt0_col = ['run','subrun','event','tpc','pfpid','score','hypo_pe','flash_pe']

razz_pfp_col = ['run','subrun','event','slc_pfp_id',
                'slc_pfp_razzled_electron_score','slc_pfp_razzled_muon_score','slc_pfp_razzled_pdg','slc_pfp_razzled_photon_score','slc_pfp_razzled_pion_score','slc_pfp_razzled_proton_score']

razz_slc_col = ['run','subrun','event','slc_primary_pfp_id','slc_n_pfps',
                'slc_n_razzled_electrons','slc_n_razzled_muons','slc_n_razzled_photons','slc_n_razzled_pions','slc_n_razzled_protons',]

In [29]:
opt0_df  = uproot.open(opt0_path).arrays(opt0_col,library="pd")
razz_pfp_0 = uproot.open(razz_path,num_workers=64).arrays(razz_pfp_col,library="pd")
razz_slc_0 = uproot.open(razz_path,num_workers=64).arrays(razz_slc_col,library="pd")

In [4]:
# first explode :)
razz_pfp = razz_pfp_0.copy()
razz_pfp = razz_pfp.set_index(['run',"subrun","event"]).apply(pd.Series.explode).reset_index()
# change the stlvector to np.array
razz_pfp = razz_pfp.set_index(['run',"subrun","event"]).applymap(lambda x: np.array(x))
# create n_razzled column for sanity check as well as the slc idx for joining
razz_pfp["n_razzled"] = razz_pfp["slc_pfp_razzled_pdg"].apply(lambda x: len(x[x>0]))
razz_pfp = razz_pfp.reset_index()
razz_pfp["slc_idx"] = razz_pfp.groupby(["run","subrun","event"]).transform("cumcount").add(1) - 1
# # second explode :)) 
razz_pfp = razz_pfp.set_index(['run',"subrun","event","slc_idx","n_razzled"]).apply(pd.Series.explode).reset_index()

In [5]:
razz_slc = razz_slc_0.copy()
# add column for the slc idx for joining
razz_slc["slc_idx"] = razz_slc.groupby(["run","subrun","event"]).transform("cumcount").add(1) - 1
# this is for sanity check later 
razz_slc["n_razzled"] = (razz_slc["slc_n_razzled_electrons"] 
                         + razz_slc["slc_n_razzled_muons"]
                         + razz_slc["slc_n_razzled_photons"]
                         + razz_slc["slc_n_razzled_pions"]
                         + razz_slc["slc_n_razzled_protons"])
razz_slc = razz_slc.drop(columns=[ 'slc_n_razzled_electrons', 'slc_n_razzled_muons', 'slc_n_razzled_photons', 'slc_n_razzled_pions', 'slc_n_razzled_protons',])


In [6]:
razz_slc_idx = razz_slc.set_index(["run","subrun","event","slc_idx"]).sort_index()
razz_pfp_idx = razz_pfp.set_index(['run',"subrun","event","slc_idx"]) .sort_index()
razz_df      = razz_slc_idx.join(razz_pfp_idx,how="left",lsuffix="_slc",rsuffix="_pfp").reset_index()

In [7]:
hdr_df_0 = pd.read_hdf(file,key="hdr")
nuu_df_0 = pd.read_hdf(file,key="mcnu")
nuprim_df_0 = pd.read_hdf(file,key="mcnuprim")
slctrk_df_0 = pd.read_hdf(file,key="slctrk")
slcshw_df_0 = pd.read_hdf(file,key="slcshw")

# get run/subrun/event info correlated with entry/ntuple
hdr_df = nue.flatten_df(hdr_df_0)[["ntuple","entry","rec_hdr_run","rec_hdr_subrun","rec_hdr_evt","rec_hdr_pot"]].rename(columns={"rec_hdr_subrun":"subrun","rec_hdr_run":"run","rec_hdr_evt":"event"})
sub_opt0_df = opt0_df[evt_idx_set].drop_duplicates()
sub_opt0_df = hdr_df.merge(sub_opt0_df,on=evt_idx_set,how="inner")
# get opt0 information correlated with entry/ntuple
opt0_hdr_df = sub_opt0_df[["ntuple","entry","run","subrun","event"]].drop_duplicates().merge(opt0_df,how="left",on= evt_idx_set)
opt0_hdr_df = opt0_hdr_df.drop(columns=evt_idx_set)
opt0_hdr_df = opt0_hdr_df.rename(columns={"pfpid":"slc_self"})
    
# make dataframes
nuprim_df = nuprim_df_0.copy()

whereFV = nue.maskTrueVtxFv(nuprim_df)
whereSig = ((nuprim_df.iscc==1) & (abs(nuprim_df.pdg)==12) & (abs(nuprim_df.prim.pdg)==11) & (nuprim_df.prim.startE-nuprim_df.prim.endE > 0.2) )
nuprim_df = nue.defineBackground(nuprim_df)
nuprim_df["signal"] = np.where(whereFV & whereSig,0,nuprim_df["signal"])

nu_df = nuprim_df.loc[:,:,:,0]
nu_df = nue.flatten_df(nu_df)
nu_df["nu_index"] = nu_df["rec.mc.nu__index"]

# get the dataframe that contains the counts of each PDG per event 
pdg_counts = nue.getPDGCounts(nuprim_df)
# nu_df = nu_df[nu_df.signal==0]
# merge the pdg counts into the full nu_df 
nu_df = nu_df.merge(pdg_counts,how="left",on=["ntuple","entry","rec.mc.nu__index"])
# get slcpfp dataframe 
slcpfp_df = nue.getPFP(slcshw_df_0,slctrk_df_0)

# merge slcpfp with neutrino events 
slcpfp_nu_df = slcpfp_df.merge(nu_df,on=nu_idx_set,how="left")
slcpfp_nu_df["signal"] = np.where(slcpfp_df.slc_tmatch_idx==-999,5,slcpfp_nu_df['signal'])
# slcpfp_nu_df = slcpfp_nu_df.query("signal==0")
# fixes for working with updated bdt trees 
slcpfp_nu_df = slcpfp_nu_df.drop(columns=["pfp_shw_razzle_electronScore","pfp_trk_dazzle_muonScore"])
slcpfp_nu_df

Unnamed: 0,ntuple,entry,rec.slc__index,rec.slc.reco.pfp__index,slc_is_clear_cosmic,slc_vertex_x,slc_vertex_y,slc_vertex_z,slc_self,slc_tmatch_eff,...,total_prim_depE,nelec,ngamma,npi0,npi,nneu,nproton,nother,prim_exit_count,prim_cont
0,0,0,0,0,0,-35.413586,-188.217438,4.207452,53,,...,,,,,,,,,,
1,0,0,1,0,0,-159.063248,-63.412956,402.538757,54,,...,,,,,,,,,,
2,0,0,2,0,0,-52.705650,-117.144081,236.728653,52,0.834009,...,1.011247,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,True
3,0,0,2,1,0,-52.705650,-117.144081,236.728653,52,0.834009,...,1.011247,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,True
4,0,0,3,1,1,-135.301468,200.285583,215.586090,1,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3364725,0,86299,6,8,1,8.319620,-42.179394,507.402466,5,,...,,,,,,,,,,
3364726,0,86299,7,1,1,172.386826,199.635101,249.265701,7,,...,,,,,,,,,,
3364727,0,86299,7,2,1,172.386826,199.635101,249.265701,7,,...,,,,,,,,,,
3364728,0,86299,8,1,1,12.918618,160.231461,262.498993,11,,...,,,,,,,,,,


In [19]:
noncos_df = slcpfp_nu_df.query("slc_is_clear_cosmic==0")

# move run/subrun/event number into the df 
noncos_idx = noncos_df.set_index(["ntuple","entry"]).sort_index()
hdr_idx   = hdr_df.set_index(["ntuple","entry"]).sort_index()
noncos_df = noncos_idx.join(hdr_idx).reset_index()

# move opt0 information into the df
noncos_self_idx = noncos_df.set_index(["ntuple","entry","slc_self"]).sort_index()
opt_self_idx = opt0_hdr_df.set_index(["ntuple","entry","slc_self"]).sort_index()
noncos_df = noncos_self_idx.join(opt_self_idx,how="left").reset_index().sort_values(["ntuple","entry","rec.slc__index"])

# move razz information into the df 
noncos_evt_idx = noncos_df.set_index(["run","subrun","event","slc_self","pfp_id"]).sort_index() 
razz_evt_idx   = (razz_df.rename(columns={"slc_primary_pfp_id":"slc_self","slc_pfp_id":"pfp_id"})
                          .set_index(["run","subrun","event","slc_self","pfp_id"])).sort_index()
noncos_df = noncos_evt_idx.join(razz_evt_idx,how="left").reset_index()