In [None]:
import numpy as np
import matplotlib.pyplot as plt
import uproot
import awkward as ak
import pandas as pd


# Loading POTs

In [None]:
loc = "BNBNuMI_TOsc_input_numifluxpatched/hist_rootfiles/"

pot_dic = {}

beams = [
    "BNB", 
    "NuMI",
]

runs = [
    "run1",
    "run2",
    "run3",
]

horns = [
    "FHC",
    "RHC",
]


filetypes = [
    "data",
    "ext",
    "dirt",
    "nu_overlay", 
    "intrinsic_nue",
    "fullosc_overlay",
]

for filetype in filetypes:
    for beam in beams:
        for run in runs:
            for horn in horns:

                if beam == "BNB":
                    if horn == "RHC":
                        continue # no BNB RHC files
                    runhorn = run
                else:
                    runhorn = run + "_" + horn
                if runhorn == "run3_FHC":
                    continue

                if filetype == "data":
                    filename = f"{runhorn}_data_{beam}.root"
                elif filetype == "ext":
                    filename = f"checkout_data_ext{beam}_{runhorn}.root"
                elif filetype == "dirt" and beam == "BNB":
                    filename = f"checkout_prodgenie_dirt_overlay_{runhorn}.root"
                elif filetype == "dirt" and beam == "NuMI":
                    filename = f"checkout_prodgenie_{runhorn.lower()}_dirt.root"
                elif filetype == "nu_overlay" and beam == "BNB":
                    filename = f"checkout_prodgenie_bnb_nu_overlay_{runhorn.lower()}.root"
                elif filetype == "nu_overlay" and beam == "NuMI":
                    filename = f"checkout_prodgenie_{runhorn.lower()}_nu_overlay.root"
                elif filetype == "intrinsic_nue" and beam == "BNB":
                    filename = f"checkout_prodgenie_bnb_intrinsic_nue_overlay_{runhorn.lower()}.root"
                elif filetype == "intrinsic_nue" and beam == "NuMI":
                    filename = f"checkout_prodgenie_{runhorn.lower()}_intrinsic_nue_overlay.root"
                elif filetype == "fullosc_overlay" and beam == "BNB":
                    filename = f"checkout_prodgenie_bnb_numu2nue_overlay_{runhorn.lower()}.root"
                elif filetype == "fullosc_overlay" and beam == "NuMI":
                    filename = f"checkout_prodgenie_{runhorn.lower()}_fullosc_overlay.root"
                else:
                    raise ValueError(f"Invalid filetype: {filetype} for beam: {beam} and runhorn: {runhorn}")

                f = uproot.open(loc+filename)

                pot_dic[f"{beam}_{runhorn}_{filetype}"] = f["T"]["pot"].array()[0]

for k, v in pot_dic.items():
    print(k, v)


# Loading roofile things into dataframe (no osc samples)

In [None]:
beams = [
    "BNB", 
    "NuMI",
]

runs = [
    "run1",
    "run2",
    "run3",
]

horns = [
    "FHC",
    "RHC",
]

sels = [
    ("nueCC", "_FC"),
    ("nueCC", "_PC"),
    ("numuCC", "_FC"),
    ("numuCC", "_PC"),
    ("CCpi0", "_FC"),
    ("CCpi0", "_PC"),
    ("NCpi0", ""),
]

filetypes = [
    ("intrinsic", "intnue"), 
    ("nu_overlay", "overlaynumu"),
    ("appnue", "appnue"),
]

dfs = []

for beam in beams:
    for filetype_pair in filetypes:
        for run in runs:
            for horn in horns:

                if beam == "BNB":
                    if horn == "RHC":
                        continue # no BNB RHC files
                    runhorn = run
                else:
                    runhorn = run + "_" + horn

                if runhorn == "run3_FHC":
                    continue

                filename = f"roofile_obj_{beam}_{runhorn}_{filetype_pair[0]}.root"
                f = uproot.open(loc+filename)

                for sel in sels:

                    try:
                        key = f"tree_{sel[0]}_from_{filetype_pair[1]}{sel[1]}"
                        curr_df = pd.DataFrame()
                        for subkey in f[key].keys():
                            curr_df[subkey] = f[key][subkey].array(library="pd")

                        curr_df["beam"] = beam
                        curr_df["filetype"] = filetype_pair[0]
                        curr_df["runhorn"] = runhorn
                        curr_df["sel"] = sel[0] + sel[1]

                        if filetype_pair[0] == "intrinsic":
                            curr_df["pot_weight"] = pot_dic[f"{beam}_{runhorn}_data"] / pot_dic[f"{beam}_{runhorn}_intrinsic_nue"]
                        else:
                            curr_df["pot_weight"] = pot_dic[f"{beam}_{runhorn}_data"] / pot_dic[f"{beam}_{runhorn}_nu_overlay"]
                        dfs.append(curr_df)
                    except KeyError:
                        pass

all_sig_df = pd.concat(dfs, ignore_index=True, axis=0)


energy_bins = np.concatenate([np.linspace(0, 2500, 26), [1e9]])
energy_bin_centers = (energy_bins[:-1] + energy_bins[1:]) / 2

all_sig_df["non_osc_weight"] = all_sig_df["pot_weight"] * all_sig_df["e2e_weight_xs"]

all_sig_df = all_sig_df[["beam", "runhorn", "filetype", "sel", "e2e_pdg", "e2e_Etrue", "e2e_Ereco", "e2e_weight_xs", "e2e_baseline", "non_osc_weight"]]

all_sig_df["osc_weight"] = 1
all_sig_df.loc[all_sig_df["filetype"] == "appnue", "osc_weight"] = 0

all_sig_df["Ereco_bin_index"] = np.digitize(all_sig_df["e2e_Ereco"], bins=energy_bins) - 1

all_sig_df["beam_offset"] = [0 if beam == "BNB" else 26*7 for beam in all_sig_df["beam"]]
sels = all_sig_df["sel"].to_numpy()
sel_offsets = []
for sel in sels:
    if sel == "nueCC_FC":
        sel_offsets.append(0)
    elif sel == "nueCC_PC":
        sel_offsets.append(1)
    elif sel == "numuCC_FC":
        sel_offsets.append(2)
    elif sel == "numuCC_PC":
        sel_offsets.append(3)
    elif sel == "CCpi0_FC":
        sel_offsets.append(4)
    elif sel == "CCpi0_PC":
        sel_offsets.append(5)
    elif sel == "NCpi0":
        sel_offsets.append(6)
    else:
        raise ValueError(f"Invalid sel: {sel}!")
all_sig_df["sel_offset"] = sel_offsets

all_sig_df["reco_bin"] = all_sig_df["beam_offset"] + all_sig_df["sel_offset"] * 26 + all_sig_df["Ereco_bin_index"]
all_sig_df

In [None]:
nue_disapp_df = all_sig_df.query(f"filetype=='intrinsic' or (filetype=='nu_overlay' and abs(e2e_pdg)==12)").sort_values(by="reco_bin")
numu_disapp_df = all_sig_df.query(f"filetype=='nu_overlay' and abs(e2e_pdg)==14").sort_values(by="reco_bin")
nue_to_numu_df = all_sig_df.query(f"filetype=='appnue' and e2e_pdg==12").sort_values(by="reco_bin")
antinue_to_antinumu_df = all_sig_df.query(f"filetype=='appnue' and e2e_pdg==-12").sort_values(by="reco_bin")

nue_disapp_arr = nue_disapp_df[["e2e_Etrue", "e2e_baseline", "non_osc_weight", "osc_weight"]].to_numpy()
numu_disapp_arr = numu_disapp_df[["e2e_Etrue", "e2e_baseline", "non_osc_weight", "osc_weight"]].to_numpy()
numu_to_nue_arr = nue_to_numu_df[["e2e_Etrue", "e2e_baseline", "non_osc_weight", "osc_weight"]].to_numpy()
antinumu_to_antinue_arr = antinue_to_antinumu_df[["e2e_Etrue", "e2e_baseline", "non_osc_weight", "osc_weight"]].to_numpy()

nue_disapp_reco_idx = np.squeeze(nue_disapp_df[["reco_bin"]].to_numpy())
numu_disapp_reco_idx = np.squeeze(numu_disapp_df[["reco_bin"]].to_numpy())
nue_to_numu_reco_idx = np.squeeze(nue_to_numu_df[["reco_bin"]].to_numpy())
antinue_to_antinumu_reco_idx = np.squeeze(antinue_to_antinumu_df[["reco_bin"]].to_numpy())


In [None]:
def get_hist_from_arr(reco_idx, arr):
    hist = np.zeros(26*7*2)
    np.add.at(hist, reco_idx, arr[:, 2] * arr[:, 3])
    return hist


# Loading data and bkg hist_rootfiles into dataframe

In [None]:
data_bkg_hists = {}

beams = [
    "BNB", 
    "NuMI",
]

runs = [
    "run1",
    "run2",
    "run3",
]

horns = [
    "FHC",
    "RHC",
]

filetypes = [
    "data",
    "ext",
    "dirt",
]

for filetype in filetypes:
    for beam in beams:
        for run in runs:
            for horn in horns:

                if beam == "BNB":
                    if horn == "RHC":
                        continue # no BNB RHC files
                    runhorn = run
                else:
                    runhorn = run + "_" + horn
                if runhorn == "run3_FHC":
                    continue

                if filetype == "data":
                    filename = f"{runhorn}_data_{beam}.root"
                elif filetype == "ext":
                    filename = f"checkout_data_ext{beam}_{runhorn}.root"
                elif filetype == "dirt":
                    if beam == "BNB":
                        filename = f"checkout_prodgenie_dirt_overlay_{runhorn}.root"
                    else:
                        filename = f"checkout_prodgenie_{runhorn.lower()}_dirt.root"

                f = uproot.open(loc+filename)
                keys = f.keys()[:]

                if filetype == "data":
                    start_list = [
                        "nueCC_FC",
                        "nueCC_PC",
                        "numuCC_nopi0_nonueCC_FC",
                        "numuCC_nopi0_nonueCC_PC",
                        "CCpi0_nonueCC_FC",
                        "CCpi0_nonueCC_PC",
                        "NCpi0_nonueCC",
                    ]

                arr_count = 0
                filtered_keys = []
                for start in start_list:
                    if filetype != "data":
                        start = "BG_" + start
                    for key in keys:
                        if key.startswith(start):
                            filtered_keys.append(key)
                            arr_count += 1
                            break

                assert arr_count == 7, f"Expected to find 7 arrays, found {arr_count}!"
                
                total_arr = np.array([])
                for key in filtered_keys:
                    total_arr = np.concatenate([total_arr, np.array(f[key].values(flow=True)[1:])])
                data_bkg_hists[f"{beam}_{runhorn}_{filetype}"] = total_arr

                
for k, v in data_bkg_hists.items():
    print(k, len(v))


## Combining files

In [None]:
data_bkg_hists_combined = {}
for filetype in filetypes:
    curr_arr = np.zeros(26*7*2)
    for beam in beams:
        for run in runs:
            for horn in horns:
                if beam == "BNB":
                    if horn == "RHC":
                        continue # no BNB RHC files
                    runhorn = run
                else:
                    runhorn = run + "_" + horn
                if runhorn == "run3_FHC":
                    continue

                if beam == "BNB":
                    curr_arr[:26*7] += data_bkg_hists[f"{beam}_{runhorn}_{filetype}"]
                else:
                    curr_arr[26*7:] += data_bkg_hists[f"{beam}_{runhorn}_{filetype}"]

    data_bkg_hists_combined[filetype] = curr_arr

print("")
for k, v in data_bkg_hists_combined.items():
    print(k, len(v))


# Nominal Prediction Histogram

In [None]:
def plot_nue_numu_hists(arrs):

    nue_disapp_arr, numu_disapp_arr, numu_to_nue_arr, antinumu_to_antinue_arr = arrs

    all_nue_sig_hist = get_hist_from_arr(nue_disapp_reco_idx, nue_disapp_arr) + get_hist_from_arr(nue_to_numu_reco_idx, numu_to_nue_arr) + get_hist_from_arr(antinue_to_antinumu_reco_idx, antinumu_to_antinue_arr)
    all_numu_sig_hist = get_hist_from_arr(numu_disapp_reco_idx, numu_disapp_arr)


    plt.figure(figsize=(15, 8))

    plt.subplot(2, 4, 1)
    plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][:26], data_bkg_hists_combined["dirt"][:26], all_numu_sig_hist[:26], all_nue_sig_hist[:26]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "b", "g"], label=["Ext", "Dirt", "Numu", "Nue"])
    plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][:26], yerr=np.sqrt(data_bkg_hists_combined["data"][:26]), fmt=".", color="k", label="Data")
    plt.xlim(0, 2600)
    plt.title("BNB nueCC FC, no Osc")

    plt.subplot(2, 4, 2)
    plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26:26*2], data_bkg_hists_combined["dirt"][26:26*2], all_numu_sig_hist[26:26*2], all_nue_sig_hist[26:26*2]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "b", "g"], label=["Ext", "Dirt", "Numu", "Nue"])
    plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26:26*2], yerr=np.sqrt(data_bkg_hists_combined["data"][26:26*2]), fmt=".", color="k", label="Data")
    plt.xlim(0, 2600)
    plt.title("BNB nueCC PC, no osc")

    plt.subplot(2, 4, 3)
    plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*2:26*3], data_bkg_hists_combined["dirt"][26*2:26*3], all_nue_sig_hist[26*2:26*3], all_numu_sig_hist[26*2:26*3]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
    plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*2:26*3], yerr=np.sqrt(data_bkg_hists_combined["data"][26*2:26*3]), fmt=".", color="k", label="Data")
    plt.xlim(0, 2600)
    plt.title("BNB numuCC FC, no osc")

    plt.subplot(2, 4, 4)
    plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*3:26*4], data_bkg_hists_combined["dirt"][26*3:26*4], all_nue_sig_hist[26*3:26*4], all_numu_sig_hist[26*3:26*4]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
    plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*3:26*4], yerr=np.sqrt(data_bkg_hists_combined["data"][26*3:26*4]), fmt=".", color="k", label="Data")
    plt.xlim(0, 2600)
    plt.legend()
    plt.title("BNB numuCC PC, no osc")


    plt.subplot(2, 4, 5)
    plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*7:26*8], data_bkg_hists_combined["dirt"][26*7:26*8], all_numu_sig_hist[26*7:26*8], all_nue_sig_hist[26*7:26*8]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "b", "g"], label=["Ext", "Dirt", "Numu", "Nue"])
    plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*7:26*8], yerr=np.sqrt(data_bkg_hists_combined["data"][26*7:26*8]), fmt=".", color="k", label="Data")
    plt.xlim(0, 2600)
    plt.title("NuMI nueCC FC, no osc")

    plt.subplot(2, 4, 6)
    plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*8:26*9], data_bkg_hists_combined["dirt"][26*8:26*9], all_numu_sig_hist[26*8:26*9], all_nue_sig_hist[26*8:26*9]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "b", "g"], label=["Ext", "Dirt", "Numu", "Nue"])
    plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*8:26*9], yerr=np.sqrt(data_bkg_hists_combined["data"][26*8:26*9]), fmt=".", color="k", label="Data")
    plt.xlim(0, 2600)
    plt.title("NuMI nueCC PC, no osc")

    plt.subplot(2, 4, 7)
    plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*9:26*10], data_bkg_hists_combined["dirt"][26*9:26*10], all_nue_sig_hist[26*9:26*10], all_numu_sig_hist[26*9:26*10]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
    plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*9:26*10], yerr=np.sqrt(data_bkg_hists_combined["data"][26*9:26*10]), fmt=".", color="k", label="Data")
    plt.xlim(0, 2600)
    plt.title("NuMI numuCC FC, no osc")

    plt.subplot(2, 4, 8)
    plt.hist([energy_bin_centers for _ in range(4)], weights=[data_bkg_hists_combined["ext"][26*10:26*11], data_bkg_hists_combined["dirt"][26*10:26*11], all_nue_sig_hist[26*10:26*11], all_numu_sig_hist[26*10:26*11]], bins=energy_bins[:26], stacked=True, color=["grey", "brown", "g", "b"], label=["Ext", "Dirt", "Nue", "Numu"])
    plt.errorbar(energy_bin_centers, data_bkg_hists_combined["data"][26*10:26*11], yerr=np.sqrt(data_bkg_hists_combined["data"][26*10:26*11]), fmt=".", color="k", label="Data")
    plt.xlim(0, 2600)
    plt.title("NuMI numuCC PC, no osc")

    plt.show()


# 3+2 Oscillation

In [None]:
def get_3_plus_2_prob(alpha, beta, E, L, osc_params):

    m4, m5, eta, Ue4, Umu4, Ue5, Umu5 = osc_params

    # all 5x5 complex PMNS matrices have 50 parameters
    # only considering the last two columns for sterile mixings, this is 9 parameters
    # with unitarity constraints, this becomes 7 parameters: theta and delta for each of 14, 24, 34, 15, 25, 35, and 45
    # from https://arxiv.org/pdf/1107.1452, for nue->nue, numu->numu, and nume->nue, we can reduce that to just five parameters: magnitudes of Ue4, Umu4, Ue5, and Umu5, and a phase eta
    # We also still need to require small Ue4, Umu4, Ue5, and Umu5, in order to avoid unitarity violation, but I won't put a strict cut on that here,
    # since it depends on all the measured mixing parameters, which are complicated with large uncertainties.

    # alpha and beta describe the flavors, options: numu, nue, antinue, antinumu
    # m4 and m5 in eV, approximating that m1, m2, and m3 are zero (normal ordering, rather than inverted or perverted)
    # E in GeV, L in km
    # eta is related to the phases of the U elements
    # U elements are the magnitudes of the complex numbers in the extended PMNS matrix

    # E and L are arrays (no if statements on these)

    for flavor in [alpha, beta]:
        assert flavor in [-12, 12, 14, -14], f"Invalid flavor: {flavor}!"
    assert 0 < m4 < m5, f"Invalid masses: {m4} and {m5}!"
    assert np.min(E) > 0, f"Invalid energy: {E}!"
    assert np.min(L) > 0, f"Invalid length: {L}!"
    for U_elem in [Ue4, Umu4, Ue5, Umu5]:
        assert 0 <= U_elem <= 1, f"Invalid U element: {U_elem}!"

    phi_41 = 1.27 * m4**2 * L / E 
    phi_51 = 1.27 * m5**2 * L / E
    phi_54 = 1.27 * (m5**2 - m4**2) * L / E

    if abs(alpha) == 14 and abs(beta) == 12: # numu -> nue
        if alpha == -14: 
            cp_sign = 1
        else:
            cp_sign = -1

        P = 4 * Umu4**2 * Ue4**2 * np.sin(phi_41)**2
        P += 4 * Umu5**2 * Ue5**2 * np.sin(phi_51)**2
        P += 8 * Umu4 * Ue4 * Umu5 * Ue5 * np.sin(phi_41) * np.sin(phi_51) * np.cos(phi_54 + cp_sign * eta)

        return P

    elif alpha == beta: # nu_alpha -> nu_alpha
        if alpha == 12:

            P = 1 - 4 * (1 - Ue4**2 - Ue5**2) * (Ue4**2 * np.sin(phi_41)**2 + Ue5**2 * np.sin(phi_51)**2)
            P -= 4 * Ue4**2 * Ue5**2 * np.sin(phi_54)**2

            return P
        
        elif alpha == 14:
            P = 1 - 4 * (1 - Umu4**2 - Umu5**2) * (Umu4**2 * np.sin(phi_41)**2 + Umu5**2 * np.sin(phi_51)**2)
            P -= 4 * Umu4**2 * Umu5**2 * np.sin(phi_54)**2

            return P

    raise ValueError(f"{alpha} -> {beta} oscillation not implemented!")

def oscillate_arr(arr, alpha, beta, osc_params):
    E = arr[:, 0]
    L = arr[:, 1]
    #non_osc_weight = arr[:, 2]
    #osc_weight = arr[:, 3] # this is the variable we are replacing
    arr[:, 3] = get_3_plus_2_prob(alpha, beta, E, L, osc_params)

def oscillate_arrs(arrs, osc_params):
    
    nue_disapp_arr, numu_disapp_arr, numu_to_nue_arr, antinumu_to_antinue_arr = arrs

    oscillate_arr(nue_disapp_arr, 12, 12, osc_params)
    oscillate_arr(numu_disapp_arr, 14, 14, osc_params)
    oscillate_arr(numu_to_nue_arr, 14, 12, osc_params)
    oscillate_arr(antinumu_to_antinue_arr, -14, -12, osc_params)


In [None]:
arrs = [nue_disapp_arr, numu_disapp_arr, numu_to_nue_arr, antinumu_to_antinue_arr]

plot_nue_numu_hists(arrs)

In [None]:
m4 = 1
m5 = 1000
eta = 0
Ue4 = 0.5
Umu4 = 0.1
Ue5 = 0
Umu5 = 0

osc_params = [m4, m5, eta, Ue4, Umu4, Ue5, Umu5]

oscillate_arrs(arrs, osc_params)

plot_nue_numu_hists(arrs)
