In [None]:
import uproot
print("uproot version: ", uproot.__version__)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm.notebook import tqdm
import pickle

import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from src.file_locations import data_files_location
from src.ntuple_variables.variables import pandora_training_vars, glee_training_vars


# Looking at input SURPRISE ROOT Files

In [None]:
new_data_file = uproot.open(data_files_location + "/MCC9.10_Run4b_v10_04_07_11_BNB_beam_on_surprise_reco2_hist.root")
old_data_file = uproot.open(data_files_location + "/older_downloads/MCC9.10_Run4b_v10_04_07_11_BNB_beam_on_surprise_reco2_hist.root")

print(f"new data file has {new_data_file['wcpselection']['T_eval'].num_entries} events")
print(f"old data file has {old_data_file['wcpselection']['T_eval'].num_entries} events")


In [None]:
print(1/0)

In [None]:
#f = uproot.open(data_files_location + "/MCC9.10_Run4b_v10_04_07_09_Run4b_BNB_beam_off_surprise_reco2_hist.root")
f = uproot.open(data_files_location + "/checkout_MCC9.10_Run4c4d5_v10_04_07_13_BNB_NCpi0_overlay_surprise_reco2_hist_4c.root")

In [None]:
possible_glee_vars = [item[0] for item in f["singlephotonana"]["vertex_tree"].items()]
possible_glee_vars = list(set(possible_glee_vars))
possible_glee_vars = sorted(possible_glee_vars)

possible_glee_vars_df = pd.DataFrame(f["singlephotonana"]["vertex_tree"].arrays(possible_glee_vars, library="pd", entry_stop=10))

for col in possible_glee_vars_df.columns:
    if "blip" in col or "CRT" in col:
        continue
    if "truth" in col or "true" in col or "sim" in col:
        continue
    if "isolation" in col:
        continue
    if not "awkward" in str(type(possible_glee_vars_df[col][0])):
        continue
    print(col, possible_glee_vars_df[col][0], type(possible_glee_vars_df[col][0]))


In [None]:
glee_vector_vars = [
    "isolation_min_dist_trk_shr",
    "isolation_min_dist_trk_unassoc",
    "isolation_nearest_shr_hit_to_trk_time",
    "isolation_nearest_shr_hit_to_trk_wire",
    "isolation_nearest_unassoc_hit_to_trk_time",
    "isolation_nearest_unassoc_hit_to_trk_wire",
    "isolation_num_shr_hits_win_10cm_trk",
    "isolation_num_shr_hits_win_1cm_trk",
    "isolation_num_shr_hits_win_2cm_trk",
    "isolation_num_shr_hits_win_5cm_trk",
    "isolation_num_unassoc_hits_win_10cm_trk",
    "isolation_num_unassoc_hits_win_1cm_trk",
    "isolation_num_unassoc_hits_win_2cm_trk",
    "isolation_num_unassoc_hits_win_5cm_trk",
]

for event_i in range(10):
    print("------------ new event ------------")
    for var in glee_vector_vars:
        vals = f["singlephotonana"]["vertex_tree"][var].array()[event_i]
        print(vals, end=", ")
    print()


In [None]:
print(1/0)

In [None]:
pandora_vector_vars = [
    "pfp_generation_v",
    "pfng2hip_r1cm",
    "pfng2hip_r3cm",
    "pfng2hip_r5cm",
    "pfng2hip_r10cm",
    "pfng2semlabel",
    "pfng2shrfrac",
    "pfng2mipfrac",
    "pfng2hipfrac",
    "pfng2mclfrac",
    "pfng2dfsfrac",
    "pfng2bkgfrac",
    "pfng2shravrg",
    "pfng2mipavrg",
    "pfng2hipavrg",
    "pfng2mclavrg",
    "pfng2dfsavrg",
    "pfng2bkgavrg",
    "trk_pfp_id_v",
    "trk_phi_v",
    "trk_pid_chika_u_v",
    "trk_pid_chika_v",
    "trk_pid_chika_v_v",
    "trk_pid_chimu_u_v",
    "trk_pid_chimu_v",
    "trk_pid_chimu_v_v",
    "trk_pid_chipi_u_v",
    "trk_pid_chipi_v",
    "trk_pid_chipi_v_v",
    "trk_pid_chipr_u_v",
    "trk_pid_chipr_v",
    "trk_pid_chipr_v_v",
    "trk_pida_u_v",
    "trk_pida_v",
    "trk_pida_v_v",
    # "trkshrscore_v", # seems to be usually an empty list, and then occasionally [0], with no nonzero values
    "trk_llr_pid_score_v",
    "trk_len_v",
    "trk_distance_v",
    "trk_score_v",
    "trk_energy_muon_v",
    "trk_energy_proton_v",
]

for event_i in range(10):
    print("------------ new event ------------")
    for pfp_var in pandora_vector_vars:
        print(len(f["nuselection"]["NeutrinoSelectionFilter"][pfp_var].array()[event_i]), end=", ")
    print()


In [None]:
possible_pandora_vars = [item[0] for item in f["nuselection"]["NeutrinoSelectionFilter"].items()]

possible_pandora_vars = list(set(possible_pandora_vars))

possible_pandora_vars = sorted(possible_pandora_vars)

possible_pandora_vars_df = pd.DataFrame(f["nuselection"]["NeutrinoSelectionFilter"].arrays(possible_pandora_vars, library="pd", entry_stop=10))

# loop through each column in possible_pandora_vars_df
for col in possible_pandora_vars_df.columns:
    if "blip" in col:
        continue
    if "truth" in col or "true" in col or "sim" in col:
        continue
    if "awkward" in str(type(possible_pandora_vars_df[col][0])):
        continue
    #if "CosmicDir" in col or "CylFrac" in col:
    #    continue # no info in overleaf, not obvious what it means
    print(col, possible_pandora_vars_df[col][0], type(possible_pandora_vars_df[col][0]))



In [None]:
# loop through each column in possible_pandora_vars_df
for col in possible_pandora_vars_df.columns:
    if "blip" in col:
        continue
    if "truth" in col or "true" in col:
        continue
    if "awkward" in str(type(possible_pandora_vars_df[col][0])):
        print(col, possible_pandora_vars_df[col][0], type(possible_pandora_vars_df[col][0]))



In [None]:
print(1/0)

In [None]:
f["nuselection"]["NeutrinoSelectionFilter"].items()

In [None]:
print(1/0)

In [None]:
print("\nnewest del1g file:")
f = uproot.open(data_files_location + "/UNUSED_newer_one_test_del1g_ntuple_file.root")
for item in f.items():
    if "lantern" in item[0]:
        print(item[0])

print("\nnew del1g file:")
f = uproot.open(data_files_location + "/UNUSED_one_test_del1g_ntuple_file.root")
for item in f.items():
    if "lantern" in item[0]:
        print(item[0])

print("\nold del1g file:")
f = uproot.open(data_files_location + "/UNUSED_delete_one_gamma_run45_1k.root")
for item in f.items():
    if "lantern" in item[0]:
        print(item[0])

print("\nnue overlay file:")
f = uproot.open(data_files_location + "/checkout_MCC9.10_Run4a4c4d5_v10_04_07_13_BNB_intrinsic_nue_overlay_surprise_reco2_hist_4a.root")
for item in f.items():
    if "lantern" in item[0]:
        print(item[0])

In [None]:
f = uproot.open(data_files_location + "/checkout_MCC9.10_Run4a4c4d5_v10_04_07_13_BNB_intrinsic_nue_overlay_surprise_reco2_hist_4a.root")
for item in f.items():
    print(item[0])


In [None]:
f = uproot.open(data_files_location + "/UNUSED_one_test_del1g_ntuple_file.root")
f.items()

In [None]:
f = uproot.open(data_files_location + "/UNUSED_one_test_del1g_ntuple_file.root")
print(f["wcpselection"]["T_PFeval"]["evtTimeNS"].array(library="np"))

f = uproot.open(data_files_location + "/UNUSED_one_test_iso1g_ntuple_file.root")
print(f["wcpselection"]["T_PFeval"]["evtTimeNS"].array(library="np"))

In [None]:
for filename in os.listdir(data_files_location):
    f = uproot.open(data_files_location + "/" + filename)
    pf_items = f["wcpselection"]["T_PFeval"].items()
    pf_varnames = [item[0] for item in pf_items]
    eval_items = f["wcpselection"]["T_eval"].items()
    eval_varnames = [item[0] for item in eval_items]
    bdt_items = f["wcpselection"]["T_BDTvars"].items()
    bdt_varnames = [item[0] for item in bdt_items]
    contains_WCPMTInfo = False
    for varname in pf_varnames:
        if "WCPMTInfo" in varname:
            contains_WCPMTInfo = True
    for varname in eval_varnames:
        if "WCPMTInfo" in varname:
            contains_WCPMTInfo = True
    for varname in bdt_varnames:
        if "WCPMTInfo" in varname:
            contains_WCPMTInfo = True
    num_events = f["wcpselection"]["T_eval"].num_entries
    print(f'{filename.ljust(100)}', f'{contains_WCPMTInfo=}', f'{num_events=}')


In [None]:
f = uproot.open(data_files_location + "/MCC9.10_Run4b_v10_04_07_09_BNB_nu_overlay_surprise_reco2_hist.root")

f["wcpselection"]["T_eval"].items()


In [None]:
f = uproot.open(data_files_location + "/MCC9.10_Run4b_v10_04_07_09_BNB_nu_overlay_surprise_reco2_hist.root")

num_events = None

wc_eval_df = f["wcpselection"]["T_eval"].arrays(["run", "subrun", "event", "truth_isCC", "truth_nuPdg"], library="pd", entry_stop=num_events)
wc_pfeval_df = f["wcpselection"]["T_PFeval"].arrays(["truth_NCDelta"], library="pd", entry_stop=num_events)

wc_truth_df = f["wcpselection"]["T_PFeval"].arrays(["truth_pdg", "truth_mother"], library="pd", entry_stop=num_events)
wc_truth_pdgs = wc_truth_df["truth_pdg"].to_numpy()
wc_truth_mothers = wc_truth_df["truth_mother"].to_numpy()
prim_pdgs = []
all_pdgs = []
for i in tqdm(range(len(wc_truth_pdgs))):
    curr_prim_pdgs = []
    curr_all_pdgs = []
    for j in range(len(wc_truth_pdgs[i])):
        curr_all_pdgs.append(wc_truth_pdgs[i][j])
        if wc_truth_mothers[i][j] == 0:
            if wc_truth_pdgs[i][j] not in curr_prim_pdgs:# and abs(wc_truth_pdgs[i][j]) < 10000:
                curr_prim_pdgs.append(wc_truth_pdgs[i][j])
    prim_pdgs.append(curr_prim_pdgs)
    all_pdgs.append(curr_all_pdgs)
wc_pfeval_df["all_geant_pdgs"] = all_pdgs
wc_pfeval_df["geant_unique_nonnuc_prim_pdgs"] = prim_pdgs

wc_df = pd.concat([wc_eval_df, wc_pfeval_df], axis=1)
# add prefix to columns
wc_df.columns = ["wc_" + col for col in wc_df.columns]

glee_df = f["singlephotonana"]["vertex_tree"].arrays(["mctruth_is_delta_radiative", "mctruth_delta_radiative_1g1p_or_1g1n", "mctruth_cc_or_nc", "mctruth_delta_photon_energy"], library="pd", entry_stop=num_events)
glee_df.columns = ["glee_" + col for col in glee_df.columns]

df = pd.concat([wc_df, glee_df], axis=1)
print("original num events: ", len(df))
normal_ncdelta_df = df.query("wc_truth_NCDelta == 1 and wc_truth_isCC == False")
weird_df = df.query("wc_truth_NCDelta == 1 and wc_truth_isCC == True")
print("num events with wc_truth_NCDelta == 1 and wc_truth_isCC == True: ", len(weird_df))
print("num events with wc_truth_NCDelta == 1 and wc_truth_isCC == False: ", len(normal_ncdelta_df))
display(df)
display(weird_df)


In [None]:
print(1/0)

In [None]:
f = uproot.open("/Users/leehagaman/uboone_python/uboone_ngem/data_files/SURPRISE_Test_Samples_v10_04_07_05_Run4b_hyper_unified_reco2_BNB_nu_NC_pi0_overlay_may8_reco2_hist_62280465_snapshot.root")


In [None]:
print("lantern vertex variables:")
for item in f["lantern"]["EventTree"].items():
    if "true" not in item[0]:
        if "vtx" in item[0] or "vertex" in item[0]:
            print(item[0], f["lantern"]["EventTree"][item[0]].array()[0])


In [None]:
print("\nlantern shower variables:")
for item in f["lantern"]["EventTree"].items():
    if "true" not in item[0]:
        if "shower" in item[0].lower():
            print(item[0], f["lantern"]["EventTree"][item[0]].array()[0])



In [None]:
f["lantern"]["EventTree"]["nTracks"].array()

In [None]:
f["lantern"]["EventTree"]["nShowers"].array()

In [None]:
print("\nlantern track variables:")
for item in f["lantern"]["EventTree"].items():
    if "true" not in item[0]:
        if "track" in item[0].lower():
            print(item[0], f["lantern"]["EventTree"][item[0]].array()[0])



In [None]:
print("\nlantern other variables:")
for item in f["lantern"]["EventTree"].items():
    if "true" not in item[0].lower():
        if "vtx" not in item[0] and "weight" not in item[0].lower() and item[0] not in ["run", "subrun", "event", "fileid"]:
            if "track" not in item[0].lower() and "shower" not in item[0].lower():
                print(item[0], f["lantern"]["EventTree"][item[0]].array()[0])

In [None]:
f["lantern"]["EventTree"]["kpMaxScore"].array()

In [None]:
print(1/0)

In [None]:
f["wcpselection"]["T_PFeval"]["reco_truthMatch_pdg"].items()

In [None]:
f["wcpselection"]["T_eval"].items()

In [None]:
f["wcpselection"]["T_PFeval"].items()

In [None]:
print(1/0)

In [None]:
f["nuselection"]["NeutrinoSelectionFilter"].items()

In [None]:
for item in f["singlephotonana"]["vertex_tree"].items():
    if "sss" in item[0]:
        print(f'"{item[0]}",')

In [None]:
f["singlephotonana"]["vertex_tree"]["sss3d_shower_score"].array()

In [None]:
for item in f["lantern"]["EventTree"].items():
    var = item[0]
    if "true" not in var:
        print(var)

# Looking at dataframes

In [None]:
with open("/nevis/riverside/data/leehagaman/ngem/intermediate_files/all_df.pkl", "rb") as f:
    all_df = pickle.load(f)

all_df

In [None]:
for var in pandora_training_vars:
    print(var)
    print(all_df[var][:5])
    print()


In [None]:
for var in glee_training_vars:
    print(var)
    print(all_df[var][:5])
    print()


In [None]:
bins = np.linspace(-0.5, 10.5, 12)

plt.figure()
plt.hist(all_df["wc_reco_num_protons_5_MeV"], bins=bins, label="5 MeV", histtype="step")
plt.hist(all_df["wc_reco_num_protons_10_MeV"], bins=bins, label="10 MeV", histtype="step")
plt.hist(all_df["wc_reco_num_protons_15_MeV"], bins=bins, label="15 MeV", histtype="step")
plt.hist(all_df["wc_reco_num_protons_20_MeV"], bins=bins, label="20 MeV", histtype="step")
plt.hist(all_df["wc_reco_num_protons_25_MeV"], bins=bins, label="25 MeV", histtype="step")
plt.hist(all_df["wc_reco_num_protons_30_MeV"], bins=bins, label="30 MeV", histtype="step")
plt.hist(all_df["wc_reco_num_protons_35_MeV"], bins=bins, label="35 MeV", histtype="step")
plt.hist(all_df["wc_reco_num_protons_40_MeV"], bins=bins, label="40 MeV", histtype="step")
plt.hist(all_df["wc_reco_num_protons_45_MeV"], bins=bins, label="45 MeV", histtype="step")
plt.hist(all_df["wc_reco_num_protons_50_MeV"], bins=bins, label="50 MeV", histtype="step")
plt.legend()
plt.yscale("log")
plt.show()

plt.figure()
plt.hist(all_df["lantern_prim_track_proton_num_5MeV"], bins=bins, label="5 MeV", histtype="step")
plt.hist(all_df["lantern_prim_track_proton_num_10MeV"], bins=bins, label="10 MeV", histtype="step")
plt.hist(all_df["lantern_prim_track_proton_num_15MeV"], bins=bins, label="15 MeV", histtype="step")
plt.hist(all_df["lantern_prim_track_proton_num_20MeV"], bins=bins, label="20 MeV", histtype="step")
plt.hist(all_df["lantern_prim_track_proton_num_25MeV"], bins=bins, label="25 MeV", histtype="step")
plt.hist(all_df["lantern_prim_track_proton_num_30MeV"], bins=bins, label="30 MeV", histtype="step")
plt.hist(all_df["lantern_prim_track_proton_num_35MeV"], bins=bins, label="35 MeV", histtype="step")
plt.hist(all_df["lantern_prim_track_proton_num_40MeV"], bins=bins, label="40 MeV", histtype="step")
plt.hist(all_df["lantern_prim_track_proton_num_45MeV"], bins=bins, label="45 MeV", histtype="step")
plt.hist(all_df["lantern_prim_track_proton_num_50MeV"], bins=bins, label="50 MeV", histtype="step")
plt.legend()
plt.yscale("log")
plt.show()


In [None]:
np.max(all_df["lantern_prim_track_proton_num_5MeV"])

In [None]:
lantern_cols = [col for col in all_df.columns if "lantern" in col]
lantern_df = all_df[lantern_cols]

# print each element in the first row of all_df
for col in lantern_df.columns:
    print(f"{col}: {lantern_df[col][0]}")

lantern_df

In [None]:
all_df.columns

In [None]:
np.nan_to_num(all_df["glee_max_ssv_score"], nan=-999)

In [None]:
plt.hist(np.nan_to_num(all_df["glee_max_ssv_score"], nan=-999), bins=100)

In [None]:
all_df[[col for col in all_df.columns if "glee" in col]]

In [None]:
for col in all_df.columns:
    if "glee" in col:
        print(f"{col}: {all_df[col][0]}")

In [None]:
all_df[[col for col in all_df.columns if "blip" in col]]

In [None]:
all_df["glee_sss_candidate_veto_score"]

In [None]:
all_df[['wc_pandora_dist', 'wc_pandora_sce_dist','wc_lantern_dist', 'lantern_pandora_dist', 'lantern_pandora_sce_dist']]

plt.figure(figsize=(10, 5))
bins = np.linspace(0, 10, 100)
plt.hist(all_df['wc_pandora_dist'], histtype='step', bins=bins, label='WC Pandora')
plt.hist(all_df['wc_pandora_sce_dist'], histtype='step', bins=bins, label='WC Pandora SCE')
plt.hist(all_df['wc_lantern_dist'], histtype='step', bins=bins, label='WC Lantern')
plt.hist(all_df['lantern_pandora_dist'], histtype='step', bins=bins, label='Lantern Pandora')
plt.hist(all_df['lantern_pandora_sce_dist'], histtype='step', bins=bins, label='Lantern Pandora SCE')
plt.legend()
plt.show()

In [None]:
all_df["wc_reco_nuvtxX"]

In [None]:
all_df["pelee_reco_nu_vtx_x"]

In [None]:
all_df["lantern_vtxX"]