In [None]:
import numpy as np
import matplotlib.pyplot as plt
import uproot
import awkward as ak
import pandas as pd


# Loading POTs

In [None]:
loc = "BNBNuMI_TOsc_input_numifluxpatched/hist_rootfiles/"

pot_dic = {}

beams = [
    "BNB", 
    "NuMI",
]

runs = [
    "run1",
    "run2",
    "run3",
]

horns = [
    "FHC",
    "RHC",
]


filetypes = [
    "data",
    "ext",
    "dirt",
    "nu_overlay", 
    "intrinsic_nue",
]

for filetype in filetypes:
    for beam in beams:
        for run in runs:
            for horn in horns:

                if beam == "BNB":
                    if horn == "RHC":
                        continue # no BNB RHC files
                    runhorn = run
                else:
                    runhorn = run + "_" + horn
                if runhorn == "run3_FHC":
                    continue

                if filetype == "data":
                    filename = f"{runhorn}_data_{beam}.root"
                elif filetype == "ext":
                    filename = f"checkout_data_ext{beam}_{runhorn}.root"
                elif filetype == "dirt" and beam == "BNB":
                    filename = f"checkout_prodgenie_dirt_overlay_{runhorn}.root"
                elif filetype == "dirt" and beam == "NuMI":
                    filename = f"checkout_prodgenie_{runhorn.lower()}_dirt.root"
                elif filetype == "nu_overlay" and beam == "BNB":
                    filename = f"checkout_prodgenie_bnb_nu_overlay_{runhorn.lower()}.root"
                elif filetype == "nu_overlay" and beam == "NuMI":
                    filename = f"checkout_prodgenie_{runhorn.lower()}_nu_overlay.root"
                elif filetype == "intrinsic_nue" and beam == "BNB":
                    filename = f"checkout_prodgenie_bnb_intrinsic_nue_overlay_{runhorn.lower()}.root"
                elif filetype == "intrinsic_nue" and beam == "NuMI":
                    filename = f"checkout_prodgenie_{runhorn.lower()}_intrinsic_nue_overlay.root"
                else:
                    raise ValueError(f"Invalid filetype: {filetype} for beam: {beam} and runhorn: {runhorn}")

                f = uproot.open(loc+filename)

                pot_dic[f"{beam}_{runhorn}_{filetype}"] = f["T"]["pot"].array()[0]

                
for k, v in pot_dic.items():
    print(k, v)


# Loading roofile things into dataframe (no osc samples)

In [None]:
beams = [
    "BNB", 
    "NuMI",
]

runs = [
    "run1",
    "run2",
    "run3",
]

horns = [
    "FHC",
    "RHC",
]

sels = [
    ("nueCC", "_FC"),
    ("nueCC", "_PC"),
    ("numuCC", "_FC"),
    ("numuCC", "_PC"),
    ("CCpi0", "_FC"),
    ("CCpi0", "_PC"),
    ("NCpi0", ""),
]

filetypes = [
    ("intrinsic", "intnue"), 
    ("nu_overlay", "overlaynumu"),
]

dfs = []

for beam in beams:
    for filetype_pair in filetypes:
        for run in runs:
            for horn in horns:

                if beam == "BNB":
                    if horn == "RHC":
                        continue # no BNB RHC files
                    runhorn = run
                else:
                    runhorn = run + "_" + horn

                if runhorn == "run3_FHC":
                    continue

                filename = f"roofile_obj_{beam}_{runhorn}_{filetype_pair[0]}.root"
                f = uproot.open(loc+filename)

                for sel in sels:

                    try:
                        key = f"tree_{sel[0]}_from_{filetype_pair[1]}{sel[1]}"
                        curr_df = pd.DataFrame()
                        for subkey in f[key].keys():
                            curr_df[subkey] = f[key][subkey].array(library="pd")

                        curr_df["beam"] = beam
                        curr_df["filetype"] = filetype_pair[0]
                        curr_df["runhorn"] = runhorn
                        curr_df["sel"] = sel[0] + sel[1]

                        if filetype_pair[0] == "intrinsic":
                            curr_df["pot_weight"] = pot_dic[f"{beam}_{runhorn}_data"] / pot_dic[f"{beam}_{runhorn}_intrinsic_nue"]
                        else:
                            curr_df["pot_weight"] = pot_dic[f"{beam}_{runhorn}_data"] / pot_dic[f"{beam}_{runhorn}_nu_overlay"]
                        dfs.append(curr_df)
                    except KeyError:
                        pass

all_sig_df = pd.concat(dfs, ignore_index=True, axis=0)

all_sig_df["net_weight"] = all_sig_df["pot_weight"] * all_sig_df["e2e_weight_xs"]

all_sig_df = all_sig_df[["beam", "runhorn", "filetype", "sel", "net_weight", "e2e_pdg", "e2e_Etrue", "e2e_Ereco", "e2e_weight_xs", "e2e_baseline"]]

all_sig_df



In [None]:
all_sig_df.query("e2e_pdg==12")

# Loading data and bkg hist_rootfiles into dataframe

In [None]:
data_bkg_hists = {}

beams = [
    "BNB", 
    "NuMI",
]

runs = [
    "run1",
    "run2",
    "run3",
]

horns = [
    "FHC",
    "RHC",
]

filetypes = [
    "data",
    "ext",
    "dirt",
]

for filetype in filetypes:
    for beam in beams:
        for run in runs:
            for horn in horns:

                if beam == "BNB":
                    if horn == "RHC":
                        continue # no BNB RHC files
                    runhorn = run
                else:
                    runhorn = run + "_" + horn
                if runhorn == "run3_FHC":
                    continue

                if filetype == "data":
                    filename = f"{runhorn}_data_{beam}.root"
                elif filetype == "ext":
                    filename = f"checkout_data_ext{beam}_{runhorn}.root"
                elif filetype == "dirt":
                    if beam == "BNB":
                        filename = f"checkout_prodgenie_dirt_overlay_{runhorn}.root"
                    else:
                        filename = f"checkout_prodgenie_{runhorn.lower()}_dirt.root"

                f = uproot.open(loc+filename)
                keys = f.keys()[:]

                if filetype == "data":
                    start_list = [
                        "nueCC_FC",
                        "nueCC_PC",
                        "numuCC_nopi0_nonueCC_FC",
                        "numuCC_nopi0_nonueCC_PC",
                        "CCpi0_nonueCC_FC",
                        "CCpi0_nonueCC_PC",
                        "NCpi0_nonueCC",
                    ]

                arr_count = 0
                filtered_keys = []
                for start in start_list:
                    if filetype != "data":
                        start = "BG_" + start
                    for key in keys:
                        if key.startswith(start):
                            filtered_keys.append(key)
                            arr_count += 1
                            break

                assert arr_count == 7, f"Expected to find 7 arrays, found {arr_count}!"
                
                total_arr = np.array([])
                for key in filtered_keys:
                    total_arr = np.concatenate([total_arr, np.array(f[key].values(flow=True)[1:])])
                data_bkg_hists[f"{beam}_{runhorn}_{filetype}"] = total_arr

                
for k, v in data_bkg_hists.items():
    print(k, len(v))


## Combining files

In [None]:
data_bkg_hists_combined = {}
for filetype in filetypes:
    curr_arr = np.zeros(26*7*2)
    for beam in beams:
        for run in runs:
            for horn in horns:
                if beam == "BNB":
                    if horn == "RHC":
                        continue # no BNB RHC files
                    runhorn = run
                else:
                    runhorn = run + "_" + horn
                if runhorn == "run3_FHC":
                    continue

                if beam == "BNB":
                    curr_arr[:26*7] += data_bkg_hists[f"{beam}_{runhorn}_{filetype}"]
                else:
                    curr_arr[26*7:] += data_bkg_hists[f"{beam}_{runhorn}_{filetype}"]

    data_bkg_hists_combined[filetype] = curr_arr

print("")
for k, v in data_bkg_hists_combined.items():
    print(k, len(v))


# Nominal Prediction Histogram

## No osc weighting

In [None]:
energy_bins = np.concatenate([np.linspace(0, 2500, 26), [1e9]])
energy_bin_centers = (energy_bins[:-1] + energy_bins[1:]) / 2

sels = [
    "nueCC_FC",
    "nueCC_PC",
    "numuCC_FC",
    "numuCC_PC",
    "CCpi0_FC",
    "CCpi0_PC",
    "NCpi0",
]

all_nue_sig_hist = np.zeros(26*7*2)
all_numu_sig_hist = np.zeros(26*7*2)

for sigtype in ["nue", "numu"]:
    if sigtype == "nue":
        sig_df = all_sig_df.query(f"abs(e2e_pdg)==12")
    else:
        sig_df = all_sig_df.query(f"abs(e2e_pdg)==14")
        
    for sel_i, sel in enumerate(sels):
        sig_sel_df = sig_df.query(f"sel=='{sel}'")
        for beam_i, beam in enumerate(["BNB", "NuMI"]):
            sig_sel_beam_df = sig_sel_df.query(f"beam=='{beam}'")
            sig_sel_beam_hist = np.histogram(sig_sel_beam_df["e2e_Ereco"], bins=energy_bins, weights=sig_sel_beam_df["net_weight"])[0]

            start_index = beam_i*26*7 + sel_i*26
            if sigtype == "nue":
                all_nue_sig_hist[start_index:start_index+26] = sig_sel_beam_hist
            else:
                all_numu_sig_hist[start_index:start_index+26] = sig_sel_beam_hist


In [None]:
bins = np.linspace(0, 26*7*2, 26*7*2+1)
bin_centers = (bins[:-1] + bins[1:]) / 2

plt.figure(figsize=(10, 6))
plt.hist(bin_centers, bins=bins, weights=data_bkg_hists_combined["data"], histtype="step", color="k", label="Data")
plt.hist(bin_centers, bins=bins, weights=data_bkg_hists_combined["ext"], histtype="step", color="grey", label="Ext")
plt.hist(bin_centers, bins=bins, weights=data_bkg_hists_combined["dirt"], histtype="step", color="brown", label="Dirt")

plt.hist(bin_centers, bins=bins, weights=all_nue_sig_hist, histtype="step", color="g", label="Nue")
plt.hist(bin_centers, bins=bins, weights=all_numu_sig_hist, histtype="step", color="b", label="Numu")

plt.legend()
plt.yscale("log")
plt.show()


In [None]:
plt.figure(figsize=(15, 8))

plt.subplot(2, 4, 1)
plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][:26], data_bkg_hists_combined["dirt"][:26], all_nue_sig_hist[:26], all_numu_sig_hist[:26]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][:26], yerr=np.sqrt(data_bkg_hists_combined["data"][:26]), fmt=".", color="k", label="Data")
plt.xlim(0, 2600)
plt.title("BNB nueCC FC, no Osc")

plt.subplot(2, 4, 2)
plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26:26*2], data_bkg_hists_combined["dirt"][26:26*2], all_nue_sig_hist[26:26*2], all_numu_sig_hist[26:26*2]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26:26*2], yerr=np.sqrt(data_bkg_hists_combined["data"][26:26*2]), fmt=".", color="k", label="Data")
plt.xlim(0, 2600)
plt.title("BNB nueCC PC, no osc")

plt.subplot(2, 4, 3)
plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*2:26*3], data_bkg_hists_combined["dirt"][26*2:26*3], all_nue_sig_hist[26*2:26*3], all_numu_sig_hist[26*2:26*3]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*2:26*3], yerr=np.sqrt(data_bkg_hists_combined["data"][26*2:26*3]), fmt=".", color="k", label="Data")
plt.xlim(0, 2600)
plt.title("BNB numuCC FC, no osc")

plt.subplot(2, 4, 4)
plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*3:26*4], data_bkg_hists_combined["dirt"][26*3:26*4], all_nue_sig_hist[26*3:26*4], all_numu_sig_hist[26*3:26*4]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*3:26*4], yerr=np.sqrt(data_bkg_hists_combined["data"][26*3:26*4]), fmt=".", color="k", label="Data")
plt.xlim(0, 2600)
plt.legend()
plt.title("BNB numuCC PC, no osc")

plt.subplot(2, 4, 5)
plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*7:26*8], data_bkg_hists_combined["dirt"][26*7:26*8], all_nue_sig_hist[26*7:26*8], all_numu_sig_hist[26*7:26*8]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*7:26*8], yerr=np.sqrt(data_bkg_hists_combined["data"][26*7:26*8]), fmt=".", color="k", label="Data")
plt.xlim(0, 2600)
plt.title("NuMI nueCC FC, no osc")

plt.subplot(2, 4, 6)
plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*8:26*9], data_bkg_hists_combined["dirt"][26*8:26*9], all_nue_sig_hist[26*8:26*9], all_numu_sig_hist[26*8:26*9]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*8:26*9], yerr=np.sqrt(data_bkg_hists_combined["data"][26*8:26*9]), fmt=".", color="k", label="Data")
plt.xlim(0, 2600)
plt.title("NuMI nueCC PC, no osc")

plt.subplot(2, 4, 7)
plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*9:26*10], data_bkg_hists_combined["dirt"][26*9:26*10], all_nue_sig_hist[26*9:26*10], all_numu_sig_hist[26*9:26*10]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*9:26*10], yerr=np.sqrt(data_bkg_hists_combined["data"][26*9:26*10]), fmt=".", color="k", label="Data")
plt.xlim(0, 2600)
plt.title("NuMI numuCC FC, no osc")

plt.subplot(2, 4, 8)
plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*10:26*11], data_bkg_hists_combined["dirt"][26*10:26*11], all_nue_sig_hist[26*10:26*11], all_numu_sig_hist[26*10:26*11]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*10:26*11], yerr=np.sqrt(data_bkg_hists_combined["data"][26*10:26*11]), fmt=".", color="k", label="Data")
plt.xlim(0, 2600)
plt.title("NuMI numuCC PC, no osc")

plt.show()
