In [None]:
import numpy as np
import matplotlib.pyplot as plt
import uproot
import awkward as ak
import pandas as pd


# Loading roofile things into dataframe (no osc samples)

In [None]:
loc = "BNBNuMI_TOsc_input_numifluxpatched/hist_rootfiles/"

beams = [
    "BNB", 
    "NuMI",
]

runs = [
    "run1",
    "run2",
    "run3",
]

horns = [
    "FHC",
    "RHC",
]

sels = [
    ("nueCC", "_FC"),
    ("nueCC", "_PC"),
    ("numuCC", "_FC"),
    ("numuCC", "_PC"),
    ("CCpi0", "_FC"),
    ("CCpi0", "_PC"),
    ("NCpi0", ""),
]

filetypes = [
    ("intrinsic", "intnue"), 
    ("nu_overlay", "overlaynumu"),
]

dfs = []

for beam in beams:
    for filetype_pair in filetypes:
        for run in runs:
            for horn in horns:

                if beam == "BNB":
                    if horn == "RHC":
                        continue # no BNB RHC files
                    runhorn = run
                else:
                    runhorn = run + "_" + horn


                if runhorn == "run3_FHC":
                    continue

                filename = f"roofile_obj_{beam}_{runhorn}_{filetype_pair[0]}.root"
                f = uproot.open(loc+filename)

                for sel in sels:

                    try:
                        key = f"tree_{sel[0]}_from_{filetype_pair[1]}{sel[1]}"

                        curr_df = pd.DataFrame()
                        for subkey in f[key].keys():
                            curr_df[subkey] = f[key][subkey].array(library="pd")

                        curr_df["beam"] = beam
                        curr_df["filetype"] = filetype_pair[0]
                        curr_df["runhorn"] = runhorn
                        curr_df["sel"] = sel[0] + sel[1]

                        dfs.append(curr_df)
                    except KeyError:
                        pass

all_sig_df = pd.concat(dfs, ignore_index=True, axis=0)

all_sig_df = all_sig_df[["beam", "runhorn", "filetype", "sel", "e2e_pdg", "e2e_Etrue", "e2e_Ereco", "e2e_weight_xs", "e2e_baseline"]]

all_sig_df



# Loading data and bkg hist_rootfiles into dataframe

In [None]:
data_bkg_hists = {}

beams = [
    "BNB", 
    "NuMI",
]

runs = [
    "run1",
    "run2",
    "run3",
]

filetypes = [
    "data",
    "ext",
    "dirt",
]

horns = [
    "FHC",
    "RHC",
]

for filetype in filetypes:
    for beam in beams:
        for run in runs:
            for horn in horns:

                if beam == "BNB":
                    if horn == "RHC":
                        continue # no BNB RHC files
                    runhorn = run
                else:
                    runhorn = run + "_" + horn
                if runhorn == "run3_FHC":
                    continue

                if filetype == "data":
                    filename = f"{runhorn}_data_{beam}.root"
                elif filetype == "ext":
                    filename = f"checkout_data_ext{beam}_{runhorn}.root"
                elif filetype == "dirt":
                    if beam == "BNB":
                        filename = f"checkout_prodgenie_dirt_overlay_{runhorn}.root"
                    else:
                        filename = f"checkout_prodgenie_{runhorn.lower()}_dirt.root"

                f = uproot.open(loc+filename)
                keys = f.keys()[:]

                if filetype == "data":
                    start_list = [
                        "nueCC_FC",
                        "nueCC_PC",
                        "numuCC_nopi0_nonueCC_FC",
                        "numuCC_nopi0_nonueCC_PC",
                        "CCpi0_nonueCC_FC",
                        "CCpi0_nonueCC_PC",
                        "NCpi0_nonueCC",
                    ]

                arr_count = 0
                filtered_keys = []
                for start in start_list:
                    if filetype != "data":
                        start = "BG_" + start
                    for key in keys:
                        if key.startswith(start):
                            filtered_keys.append(key)
                            arr_count += 1
                            break

                assert arr_count == 7, f"Expected to find 7 arrays, found {arr_count}!"
                
                total_arr = np.array([])
                for key in filtered_keys:
                    total_arr = np.concatenate([total_arr, np.array(f[key].values(flow=True)[1:])])
                data_bkg_hists[f"{beam}_{runhorn}_{filetype}"] = total_arr

                
for k, v in data_bkg_hists.items():
    print(k, len(v))


## Combining files

In [None]:
data_bkg_hists_combined = {}
for filetype in filetypes:
    curr_arr = np.zeros(26*7*2)
    for beam in beams:
        for run in runs:
            for horn in horns:
                if beam == "BNB":
                    if horn == "RHC":
                        continue # no BNB RHC files
                    runhorn = run
                else:
                    runhorn = run + "_" + horn
                if runhorn == "run3_FHC":
                    continue

                if beam == "BNB":
                    curr_arr[:26*7] += data_bkg_hists[f"{beam}_{runhorn}_{filetype}"]
                else:
                    curr_arr[26*7:] += data_bkg_hists[f"{beam}_{runhorn}_{filetype}"]

    data_bkg_hists_combined[filetype] = curr_arr

print("")
for k, v in data_bkg_hists_combined.items():
    print(k, len(v))


# Nominal Prediction Histogram

## No osc weighting

In [None]:
energy_bins = np.concatenate([np.linspace(0, 2500, 26), [1e9]])

sels = [
    "nueCC_FC",
    "nueCC_PC",
    "numuCC_FC",
    "numuCC_PC",
    "CCpi0_FC",
    "CCpi0_PC",
    "NCpi0",
]

all_nue_sig_hist = np.zeros(26*7*2)
all_numu_sig_hist = np.zeros(26*7*2)

for sigtype in ["nue", "numu"]:
    if sigtype == "nue":
        sig_df = all_sig_df.query(f"abs(e2e_pdg)==12")
    else:
        sig_df = all_sig_df.query(f"abs(e2e_pdg)==14")
        
    for sel_i, sel in enumerate(sels):
        sig_sel_df = sig_df.query(f"sel=='{sel}'")
        for beam_i, beam in enumerate(["BNB", "NuMI"]):
            sig_sel_beam_df = sig_sel_df.query(f"beam=='{beam}'")
            sig_sel_beam_hist = np.histogram(sig_sel_beam_df["e2e_Ereco"], bins=energy_bins, weights=sig_sel_beam_df["e2e_weight_xs"])[0]

            start_index = beam_i*26*7 + sel_i*26
            if sigtype == "nue":
                all_nue_sig_hist[start_index:start_index+26] = sig_sel_beam_hist
            else:
                all_numu_sig_hist[start_index:start_index+26] = sig_sel_beam_hist


In [None]:
bins = np.linspace(0, 26*7*2, 26*7*2+1)
bin_centers = (bins[:-1] + bins[1:]) / 2

plt.figure(figsize=(10, 6))
plt.hist(bin_centers, bins=bins, weights=data_bkg_hists_combined["data"], histtype="step", color="k", label="Data")
plt.hist(bin_centers, bins=bins, weights=data_bkg_hists_combined["ext"], histtype="step", color="grey", label="Ext")
plt.hist(bin_centers, bins=bins, weights=data_bkg_hists_combined["dirt"], histtype="step", color="brown", label="Dirt")

plt.hist(bin_centers, bins=bins, weights=all_nue_sig_hist, histtype="step", color="g", label="Nue")
plt.hist(bin_centers, bins=bins, weights=all_numu_sig_hist, histtype="step", color="b", label="Numu")

plt.hist(all_sig_df.query("beam=='BNB' and runhorn=='run1_FHC' and filetype=='intrinsic' and sel=='nueCC_FC'")["e2e_Etrue"], bins=bins, weights=all_sig_df.query("beam=='BNB' and runhorn=='run1_FHC' and filetype=='intrinsic' and sel=='nueCC_FC'")["e2e_weight_xs"], histtype="step", color="r", label="BNB Intrinsic nueCC FC")
plt.legend()
plt.yscale("log")
plt.show()
