In [None]:
import uproot
print("uproot version: ", uproot.__version__)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm.notebook import tqdm
import pickle

import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from src.file_locations import data_files_location


# Looking at input SURPRISE ROOT Files

In [None]:
f = uproot.open(data_files_location + "/MCC9.10_Run4b_v10_04_07_09_Run4b_BNB_beam_off_surprise_reco2_hist.root")
for item in f.items():
    print(item[0])

In [None]:
f["lantern"]["EventTree"].items()

In [None]:
f["lantern"]["EventTree"]["showerIsSecondary"].array(library="np")[36:40]

In [None]:
f["lantern"]["EventTree"]["showerRecoE"].array(library="np")[36:40]

In [None]:
f["lantern"]["EventTree"]["showerFromNeutralScore"].array(library="np")[36:40]

In [None]:
print(1/0)

In [None]:
f["nuselection"]["NeutrinoSelectionFilter"].items()

In [None]:
print(1/0)

In [None]:
print("\nnewest del1g file:")
f = uproot.open(data_files_location + "/UNUSED_newer_one_test_del1g_ntuple_file.root")
for item in f.items():
    if "lantern" in item[0]:
        print(item[0])

print("\nnew del1g file:")
f = uproot.open(data_files_location + "/UNUSED_one_test_del1g_ntuple_file.root")
for item in f.items():
    if "lantern" in item[0]:
        print(item[0])

print("\nold del1g file:")
f = uproot.open(data_files_location + "/UNUSED_delete_one_gamma_run45_1k.root")
for item in f.items():
    if "lantern" in item[0]:
        print(item[0])

print("\nnue overlay file:")
f = uproot.open(data_files_location + "/checkout_MCC9.10_Run4a4c4d5_v10_04_07_13_BNB_intrinsic_nue_overlay_surprise_reco2_hist_4a.root")
for item in f.items():
    if "lantern" in item[0]:
        print(item[0])

In [None]:
f = uproot.open(data_files_location + "/checkout_MCC9.10_Run4a4c4d5_v10_04_07_13_BNB_intrinsic_nue_overlay_surprise_reco2_hist_4a.root")
for item in f.items():
    print(item[0])


In [None]:
f = uproot.open(data_files_location + "/UNUSED_one_test_del1g_ntuple_file.root")
f.items()

In [None]:
f = uproot.open(data_files_location + "/UNUSED_one_test_del1g_ntuple_file.root")
print(f["wcpselection"]["T_PFeval"]["evtTimeNS"].array(library="np"))

f = uproot.open(data_files_location + "/UNUSED_one_test_iso1g_ntuple_file.root")
print(f["wcpselection"]["T_PFeval"]["evtTimeNS"].array(library="np"))

In [None]:
for filename in os.listdir(data_files_location):
    f = uproot.open(data_files_location + "/" + filename)
    pf_items = f["wcpselection"]["T_PFeval"].items()
    pf_varnames = [item[0] for item in pf_items]
    eval_items = f["wcpselection"]["T_eval"].items()
    eval_varnames = [item[0] for item in eval_items]
    bdt_items = f["wcpselection"]["T_BDTvars"].items()
    bdt_varnames = [item[0] for item in bdt_items]
    contains_WCPMTInfo = False
    for varname in pf_varnames:
        if "WCPMTInfo" in varname:
            contains_WCPMTInfo = True
    for varname in eval_varnames:
        if "WCPMTInfo" in varname:
            contains_WCPMTInfo = True
    for varname in bdt_varnames:
        if "WCPMTInfo" in varname:
            contains_WCPMTInfo = True
    num_events = f["wcpselection"]["T_eval"].num_entries
    print(f'{filename.ljust(100)}', f'{contains_WCPMTInfo=}', f'{num_events=}')


In [None]:
f = uproot.open(data_files_location + "/MCC9.10_Run4b_v10_04_07_09_BNB_nu_overlay_surprise_reco2_hist.root")

f["wcpselection"]["T_eval"].items()


In [None]:
f = uproot.open(data_files_location + "/MCC9.10_Run4b_v10_04_07_09_BNB_nu_overlay_surprise_reco2_hist.root")

num_events = None

wc_eval_df = f["wcpselection"]["T_eval"].arrays(["run", "subrun", "event", "truth_isCC", "truth_nuPdg"], library="pd", entry_stop=num_events)
wc_pfeval_df = f["wcpselection"]["T_PFeval"].arrays(["truth_NCDelta"], library="pd", entry_stop=num_events)

wc_truth_df = f["wcpselection"]["T_PFeval"].arrays(["truth_pdg", "truth_mother"], library="pd", entry_stop=num_events)
wc_truth_pdgs = wc_truth_df["truth_pdg"].to_numpy()
wc_truth_mothers = wc_truth_df["truth_mother"].to_numpy()
prim_pdgs = []
all_pdgs = []
for i in tqdm(range(len(wc_truth_pdgs))):
    curr_prim_pdgs = []
    curr_all_pdgs = []
    for j in range(len(wc_truth_pdgs[i])):
        curr_all_pdgs.append(wc_truth_pdgs[i][j])
        if wc_truth_mothers[i][j] == 0:
            if wc_truth_pdgs[i][j] not in curr_prim_pdgs:# and abs(wc_truth_pdgs[i][j]) < 10000:
                curr_prim_pdgs.append(wc_truth_pdgs[i][j])
    prim_pdgs.append(curr_prim_pdgs)
    all_pdgs.append(curr_all_pdgs)
wc_pfeval_df["all_geant_pdgs"] = all_pdgs
wc_pfeval_df["geant_unique_nonnuc_prim_pdgs"] = prim_pdgs

wc_df = pd.concat([wc_eval_df, wc_pfeval_df], axis=1)
# add prefix to columns
wc_df.columns = ["wc_" + col for col in wc_df.columns]

glee_df = f["singlephotonana"]["vertex_tree"].arrays(["mctruth_is_delta_radiative", "mctruth_delta_radiative_1g1p_or_1g1n", "mctruth_cc_or_nc", "mctruth_delta_photon_energy"], library="pd", entry_stop=num_events)
glee_df.columns = ["glee_" + col for col in glee_df.columns]

df = pd.concat([wc_df, glee_df], axis=1)
print("original num events: ", len(df))
normal_ncdelta_df = df.query("wc_truth_NCDelta == 1 and wc_truth_isCC == False")
weird_df = df.query("wc_truth_NCDelta == 1 and wc_truth_isCC == True")
print("num events with wc_truth_NCDelta == 1 and wc_truth_isCC == True: ", len(weird_df))
print("num events with wc_truth_NCDelta == 1 and wc_truth_isCC == False: ", len(normal_ncdelta_df))
display(df)
display(weird_df)


In [None]:
print(1/0)

In [None]:
f = uproot.open("/Users/leehagaman/uboone_python/uboone_ngem/data_files/SURPRISE_Test_Samples_v10_04_07_05_Run4b_hyper_unified_reco2_BNB_nu_NC_pi0_overlay_may8_reco2_hist_62280465_snapshot.root")


In [None]:
print("lantern vertex variables:")
for item in f["lantern"]["EventTree"].items():
    if "true" not in item[0]:
        if "vtx" in item[0] or "vertex" in item[0]:
            print(item[0], f["lantern"]["EventTree"][item[0]].array()[0])


In [None]:
print("\nlantern shower variables:")
for item in f["lantern"]["EventTree"].items():
    if "true" not in item[0]:
        if "shower" in item[0].lower():
            print(item[0], f["lantern"]["EventTree"][item[0]].array()[0])



In [None]:
f["lantern"]["EventTree"]["nTracks"].array()

In [None]:
f["lantern"]["EventTree"]["nShowers"].array()

In [None]:
print("\nlantern track variables:")
for item in f["lantern"]["EventTree"].items():
    if "true" not in item[0]:
        if "track" in item[0].lower():
            print(item[0], f["lantern"]["EventTree"][item[0]].array()[0])



In [None]:
print("\nlantern other variables:")
for item in f["lantern"]["EventTree"].items():
    if "true" not in item[0].lower():
        if "vtx" not in item[0] and "weight" not in item[0].lower() and item[0] not in ["run", "subrun", "event", "fileid"]:
            if "track" not in item[0].lower() and "shower" not in item[0].lower():
                print(item[0], f["lantern"]["EventTree"][item[0]].array()[0])

In [None]:
f["lantern"]["EventTree"]["kpMaxScore"].array()

In [None]:
print(1/0)

In [None]:
f["wcpselection"]["T_PFeval"]["reco_truthMatch_pdg"].items()

In [None]:
f["wcpselection"]["T_eval"].items()

In [None]:
f["wcpselection"]["T_PFeval"].items()

In [None]:
print(1/0)

In [None]:
f["nuselection"]["NeutrinoSelectionFilter"].items()

In [None]:
for item in f["singlephotonana"]["vertex_tree"].items():
    if "sss" in item[0]:
        print(f'"{item[0]}",')

In [None]:
f["singlephotonana"]["vertex_tree"]["sss3d_shower_score"].array()

In [None]:
for item in f["lantern"]["EventTree"].items():
    var = item[0]
    if "true" not in var:
        print(var)

# Looking at dataframes

In [None]:
with open("/Users/leehagaman/uboone_python/uboone_ngem/intermediate_files/all_df.pkl", "rb") as f:
    all_df = pickle.load(f)

all_df.head()

In [None]:
lantern_cols = [col for col in all_df.columns if "lantern" in col]
lantern_df = all_df[lantern_cols]

# print each element in the first row of all_df
for col in lantern_df.columns:
    print(f"{col}: {lantern_df[col][0]}")

lantern_df

In [None]:
all_df.columns

In [None]:
np.nan_to_num(all_df["glee_max_ssv_score"], nan=-999)

In [None]:
plt.hist(np.nan_to_num(all_df["glee_max_ssv_score"], nan=-999), bins=100)

In [None]:
all_df[[col for col in all_df.columns if "glee" in col]]

In [None]:
for col in all_df.columns:
    if "glee" in col:
        print(f"{col}: {all_df[col][0]}")

In [None]:
all_df[[col for col in all_df.columns if "blip" in col]]

In [None]:
all_df["glee_sss_candidate_veto_score"]

In [None]:
all_df[['wc_pandora_dist', 'wc_pandora_sce_dist','wc_lantern_dist', 'lantern_pandora_dist', 'lantern_pandora_sce_dist']]

plt.figure(figsize=(10, 5))
bins = np.linspace(0, 10, 100)
plt.hist(all_df['wc_pandora_dist'], histtype='step', bins=bins, label='WC Pandora')
plt.hist(all_df['wc_pandora_sce_dist'], histtype='step', bins=bins, label='WC Pandora SCE')
plt.hist(all_df['wc_lantern_dist'], histtype='step', bins=bins, label='WC Lantern')
plt.hist(all_df['lantern_pandora_dist'], histtype='step', bins=bins, label='Lantern Pandora')
plt.hist(all_df['lantern_pandora_sce_dist'], histtype='step', bins=bins, label='Lantern Pandora SCE')
plt.legend()
plt.show()

In [None]:
all_df["wc_reco_nuvtxX"]

In [None]:
all_df["pelee_reco_nu_vtx_x"]

In [None]:
all_df["lantern_vtxX"]