In [None]:
import numpy as np
import uproot as uproot
import uproot3 as uproot3
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd

from tqdm.notebook import tqdm

import pickle


In [None]:
# trio refers to the fact that I'm constraining M_A, NormCCMEC, and Lambda all at the same time


In [None]:
# Data files:
# /exp/uboone/data/users/lcoopert/LEE/LEEana_xs_3D_reproduction/wiener_svd/merge_xs_data.root
# /exp/uboone/data/users/lcoopert/LEE/LEEana_xs_3D_reproduction/wiener_svd/wiener_data.root

# Asimov files:
# /exp/uboone/data/users/lcoopert/LEE/LEEana_xs_3D_reproduction/wiener_svd/merge_xs_asimov.root # THIS FILE DOESN'T EXIST, but it should be the same prediction histograms as real data basically
# /exp/uboone/data/users/lcoopert/LEE/LEEana_xs_3D_reproduction/wiener_svd/wiener_asimov.root

# NuWro Fake Data files:
# /exp/uboone/data/users/lcoopert/LEE/LEEana_xs_3D_fakedata_nuwro/wiener_svd/merge_xs.root
# /exp/uboone/data/users/lcoopert/LEE/LEEana_xs_3D_fakedata_nuwro/wiener_svd/wiener_all.root

#f_merged_asimov = uproot.open("numuCC_3d_data/real_data/merge_xs_data.root") # only using predictions from this file, so it's treated as Asimov
#f_wiener_asimov = uproot.open("numuCC_3d_data/asimov_data/wiener_asimov.root")

# commenting this out just so we don't accidentally use it
#f_merged_data = uproot.open("numuCC_3d_data/real_data/merge_xs_data.root")
#f_wiener_data = uproot.open("numuCC_3d_data/real_data/wiener_data.root")

f_merged_fake_nuwro = uproot.open("numuCC_3d_data/nuwro_fake_data/merge_xs.root")
f_wiener_fake_nuwro = uproot.open("numuCC_3d_data/nuwro_fake_data/wiener_all.root")

f_merged_fake_genie_v2 = uproot.open("numuCC_3d_data/genie_v2_fake_data/merge_xs.root")
f_wiener_fake_genie_v2 = uproot.open("numuCC_3d_data/genie_v2_fake_data/wiener.root")

skip_AxFFCCQEshape_UBGenie = False

use_real_data = False
use_nuwro_fake_data = False
use_genie_v2_fake_data = False

#use_real_data = True
#use_nuwro_fake_data = True
use_genie_v2_fake_data = True

collapse_2d = False
collapse_1d = True

shape_type = "rate+shape"
#shape_type = "+100"
#shape_type = "matrix_breakdown"

regenerate_universes = True

assert not use_real_data, "Not allowed to unblind yet!"

if use_real_data:
    f_merged = f_merged_data
    f_wiener = f_wiener_data
elif use_nuwro_fake_data:
    f_merged = f_merged_fake_nuwro
    f_wiener = f_wiener_fake_nuwro
elif use_genie_v2_fake_data:
    f_merged = f_merged_fake_genie_v2
    f_wiener = f_wiener_fake_genie_v2


In [None]:
bdt_vars = [
    "nue_score",
    "numu_score",
    "numu_cc_flag"
]

eval_vars = [
    "run",
    "subrun",
    "event",
    "truth_nuEnergy",
    "truth_nuPdg",
    "truth_isCC",
    "truth_vtxInside",
    "match_isFC",
    "match_completeness_energy",
    "truth_energyInside",

    "weight_cv",
    "weight_spline",
]

eval_data_vars = [
    "match_isFC",
]

kine_vars = [
    "kine_reco_Enu",
]

pf_vars = [
    "reco_muonMomentum",
    "truth_muonMomentum",
]

pf_data_vars = [
    "reco_muonMomentum",
]

weight_vars = [
    # the framework never uses these, it uses the ones in T_eval instead!
    #"weight_cv",
    #"weight_spline",

    "All_UBGenie",
    
    "AxFFCCQEshape_UBGenie",
    "DecayAngMEC_UBGenie",
    "NormCCCOH_UBGenie",
    "NormNCCOH_UBGenie",
    "RPA_CCQE_UBGenie",
    "ThetaDelta2NRad_UBGenie",
    "Theta_Delta2Npi_UBGenie",
    "VecFFCCQEshape_UBGenie",
    "XSecShape_CCMEC_UBGenie",
    "xsr_scc_Fa3_SCC",
    "xsr_scc_Fv3_SCC",
]

#loc = "/Users/leehagaman/data/processed_checkout_rootfiles/"
loc = "/Users/leehagaman/data/from_london/"


In [None]:
f = uproot3.open(loc + "prodgenie_bnb_nu_overlay_run1/UBGenieFluxSmallUni.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
f_weight = f["T_weight"].pandas.df(weight_vars, flatten=False)
nu_overlay_run1_vars_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
nu_overlay_run1_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval, f_weight], axis=1, sort=False)
nu_overlay_run1_df["file"] = "nu_overlay_run1"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval
del f_weight

f = uproot3.open(loc + "prodgenie_bnb_nu_overlay_run2/UBGenieFluxSmallUni.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
f_weight = f["T_weight"].pandas.df(weight_vars, flatten=False)
nu_overlay_run2_vars_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
nu_overlay_run2_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval, f_weight], axis=1, sort=False)
nu_overlay_run2_df["file"] = "nu_overlay_run2"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval
del f_weight

f = uproot3.open(loc + "prodgenie_bnb_nu_overlay_run3/UBGenieFluxSmallUni.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
f_weight = f["T_weight"].pandas.df(weight_vars, flatten=False)
nu_overlay_run3_vars_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
nu_overlay_run3_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval, f_weight], axis=1, sort=False)
nu_overlay_run3_df["file"] = "nu_overlay_run3"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval
del f_weight

print(nu_overlay_run1_df.shape)
print(nu_overlay_run2_df.shape)
print(nu_overlay_run3_df.shape)

nu_overlay_vars_df = pd.concat([
    nu_overlay_run1_df, 
    nu_overlay_run2_df, 
    nu_overlay_run3_df], sort=False)


In [None]:
f = uproot3.open(loc + "checkout_prodgenie_bnb_nu_overlay_run1.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
nu_overlay_run1_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
nu_overlay_run1_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval], axis=1, sort=False)
nu_overlay_run1_df["file"] = "nu_overlay_run1"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval

f = uproot3.open(loc + "checkout_prodgenie_bnb_nu_overlay_run2.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
nu_overlay_run2_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
nu_overlay_run2_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval], axis=1, sort=False)
nu_overlay_run2_df["file"] = "nu_overlay_run2"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval

f = uproot3.open(loc + "checkout_prodgenie_bnb_nu_overlay_run3.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
nu_overlay_run3_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
nu_overlay_run3_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval], axis=1, sort=False)
nu_overlay_run3_df["file"] = "nu_overlay_run3"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval

print(nu_overlay_run1_df.shape)
print(nu_overlay_run2_df.shape)
print(nu_overlay_run3_df.shape)

nu_overlay_df = pd.concat([
    nu_overlay_run1_df, 
    nu_overlay_run2_df, 
    nu_overlay_run3_df], sort=False)


In [None]:
num_unisim_variations_dic = {}
for unisim_type in ["AxFFCCQEshape_UBGenie",
                                    "DecayAngMEC_UBGenie",
                                    "NormCCCOH_UBGenie",
                                    "NormNCCOH_UBGenie",
                                    "RPA_CCQE_UBGenie",
                                    "ThetaDelta2NRad_UBGenie",
                                    "Theta_Delta2Npi_UBGenie",
                                    "VecFFCCQEshape_UBGenie",
                                    "XSecShape_CCMEC_UBGenie",
                                    "xsr_scc_Fa3_SCC",
                                    "xsr_scc_Fv3_SCC"]:

    num_unisim_variations_dic[unisim_type] = len(nu_overlay_vars_df[unisim_type].to_numpy()[0])


In [None]:
f = uproot3.open(loc + "prodgenie_dirt_overlay_run1_all/UBGenieFluxSmallUni.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
f_weight = f["T_weight"].pandas.df(weight_vars, flatten=False)
dirt_run1_vars_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
dirt_run1_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval, f_weight], axis=1, sort=False)
dirt_run1_df["file"] = "dirt_run1"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval
del f_weight

f = uproot3.open(loc + "prodgenie_dirt_overlay_run2_all/UBGenieFluxSmallUni.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
f_weight = f["T_weight"].pandas.df(weight_vars, flatten=False)
dirt_run2_vars_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
dirt_run2_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval, f_weight], axis=1, sort=False)
dirt_run2_df["file"] = "dirt_run2"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval
del f_weight

f = uproot3.open(loc + "prodgenie_dirt_overlay_run3_all/UBGenieFluxSmallUni.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
f_weight = f["T_weight"].pandas.df(weight_vars, flatten=False)
dirt_run3_vars_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
dirt_run3_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval, f_weight], axis=1, sort=False)
dirt_run3_df["file"] = "dirt_run3"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval
del f_weight

print(dirt_run1_df.shape)
print(dirt_run2_df.shape)
print(dirt_run3_df.shape)

dirt_vars_df = pd.concat([
    dirt_run1_df, 
    dirt_run2_df, 
    dirt_run3_df], sort=False)


In [None]:
f = uproot3.open(loc + "checkout_prodgenie_dirt_overlay_run1_all.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
dirt_run1_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
dirt_run1_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval], axis=1, sort=False)
dirt_run1_df["file"] = "dirt_run1"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval

f = uproot3.open(loc + "checkout_prodgenie_dirt_overlay_run2_all.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
dirt_run2_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
dirt_run2_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval], axis=1, sort=False)
dirt_run2_df["file"] = "dirt_run2"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval

f = uproot3.open(loc + "checkout_prodgenie_dirt_overlay_run3_all.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_vars, flatten=False)
dirt_run3_pot = np.sum(f["T_pot"].pandas.df("pot_tor875good", flatten=False)["pot_tor875good"].to_numpy())
dirt_run3_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval], axis=1, sort=False)
dirt_run3_df["file"] = "dirt_run3"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval

print(dirt_run1_df.shape)
print(dirt_run2_df.shape)
print(dirt_run3_df.shape)

dirt_df = pd.concat([
    dirt_run1_df, 
    dirt_run2_df, 
    dirt_run3_df], sort=False)


In [None]:
f = uproot3.open(loc + "wcp_data_extbnb_run1_mcc9_v08_00_00_53_checkout.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_data_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_data_vars, flatten=False)
ext_run1_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval], axis=1, sort=False)
ext_run1_df["file"] = "ext_run1"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval

f = uproot3.open(loc + "wcp_data_extbnb_run2_mcc9_v08_00_00_53_checkout.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_data_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_data_vars, flatten=False)
ext_run2_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval], axis=1, sort=False)
ext_run2_df["file"] = "ext_run2"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval

f = uproot3.open(loc + "wcp_data_extbnb_run3_mcc9_v08_00_00_53_checkout.root")["wcpselection"]
f_bdt = f["T_BDTvars"].pandas.df(bdt_vars, flatten=False)
f_eval = f["T_eval"].pandas.df(eval_data_vars, flatten=False)
f_kine = f["T_KINEvars"].pandas.df(kine_vars, flatten=False)
f_pfeval = f["T_PFeval"].pandas.df(pf_data_vars, flatten=False)
ext_run3_df = pd.concat([f_bdt, f_eval, f_kine, f_pfeval], axis=1, sort=False)
ext_run3_df["file"] = "ext_run3"
del f
del f_bdt
del f_eval
del f_kine
del f_pfeval

print(ext_run1_df.shape)
print(ext_run2_df.shape)
print(ext_run3_df.shape)

ext_df = pd.concat([
    ext_run1_df, 
    ext_run2_df, 
    ext_run3_df], sort=False)

print("concatenated ext files")

ext_df["weight_cv"] = [1. for _ in range(ext_df.shape[0])]
ext_df["weight_spline"] = [1. for _ in range(ext_df.shape[0])]
print("added first ext weight values")
six_hundred_ones_arr = np.array([1. for _1 in range(600)])
All_UBGenie_ones_arr = [six_hundred_ones_arr for _2 in range(ext_df.shape[0])]
print("made ones array")
ext_df["All_UBGenie"] = All_UBGenie_ones_arr

print("added All_UBGenie ext weight values")


for unisim_type in ["AxFFCCQEshape_UBGenie",
                                    "DecayAngMEC_UBGenie",
                                    "NormCCCOH_UBGenie",
                                    "NormNCCOH_UBGenie",
                                    "RPA_CCQE_UBGenie",
                                    "ThetaDelta2NRad_UBGenie",
                                    "Theta_Delta2Npi_UBGenie",
                                    "VecFFCCQEshape_UBGenie",
                                    "XSecShape_CCMEC_UBGenie",
                                    "xsr_scc_Fa3_SCC",
                                    "xsr_scc_Fv3_SCC",]:
    
    print("adding ext weight values for unisim:", unisim_type)

    ext_df[unisim_type] = [np.array([1. for _1 in range(num_unisim_variations_dic[unisim_type])]) for _2 in range(ext_df.shape[0])] 


In [None]:
all_df = pd.concat([nu_overlay_df, dirt_df, ext_df], sort=False)
all_vars_df = pd.concat([nu_overlay_vars_df, dirt_vars_df], sort=False)


In [None]:
costheta_vals = []
muonmomentum_vals = []
reco_muonmomentum_x = all_df["reco_muonMomentum[0]"].to_numpy()
reco_muonmomentum_y = all_df["reco_muonMomentum[1]"].to_numpy()
reco_muonmomentum_z = all_df["reco_muonMomentum[2]"].to_numpy()
reco_muonmomentum_t = all_df["reco_muonMomentum[3]"].to_numpy()
for i in range(len(reco_muonmomentum_x)):
    if reco_muonmomentum_t[i] < 105.66 / 1000.: # surprising that this happens for positive values, but I did find some events
        costheta_vals.append(-1)
        muonmomentum_vals.append(-1)
    else:
        costheta_vals.append(reco_muonmomentum_z[i] / np.sqrt(reco_muonmomentum_x[i]**2 + reco_muonmomentum_y[i]**2 + reco_muonmomentum_z[i]**2))
        muon_KE = reco_muonmomentum_t[i] * 1000. - 105.66
        muonmomentum_vals.append(np.sqrt(muon_KE**2 + 2 * muon_KE * 105.66))

all_df["reco_costheta"] = costheta_vals
all_df["reco_muon_momentum"] = muonmomentum_vals


costheta_vals = []
muonmomentum_vals = []
reco_muonmomentum_x = all_vars_df["reco_muonMomentum[0]"].to_numpy()
reco_muonmomentum_y = all_vars_df["reco_muonMomentum[1]"].to_numpy()
reco_muonmomentum_z = all_vars_df["reco_muonMomentum[2]"].to_numpy()
reco_muonmomentum_t = all_vars_df["reco_muonMomentum[3]"].to_numpy()
for i in range(len(reco_muonmomentum_x)):
    if reco_muonmomentum_t[i] < 105.66 / 1000.: # surprising that this happens for positive values, but I did find some events
        costheta_vals.append(-1)
        muonmomentum_vals.append(-1)
    else:
        costheta_vals.append(reco_muonmomentum_z[i] / np.sqrt(reco_muonmomentum_x[i]**2 + reco_muonmomentum_y[i]**2 + reco_muonmomentum_z[i]**2))
        muon_KE = reco_muonmomentum_t[i] * 1000. - 105.66
        muonmomentum_vals.append(np.sqrt(muon_KE**2 + 2 * muon_KE * 105.66))

all_vars_df["reco_costheta"] = costheta_vals
all_vars_df["reco_muon_momentum"] = muonmomentum_vals



In [None]:
if use_real_data:
    data_pots = [
        1.42319e+20,
        2.5413e+20,
        2.40466e+20
    ]
elif use_nuwro_fake_data:
    data_pots = [
        0,
        2.98217e+20,
        3.12922e+20
    ]
elif use_genie_v2_fake_data:
    data_pots = [
        7.2432440e20, 
        0., 
        0.
    ]

ext_pots = [
    2.21814e+20,
    6.25014e+20,
    7.4127e+20,
]

if use_real_data:
    include_ext = True
    include_dirt = True
else:
    include_ext = False
    include_dirt = False


weight_cv_vals = all_df["weight_cv"].to_numpy()
weight_spline_vals = all_df["weight_spline"].to_numpy()
files = all_df["file"].to_numpy()
net_weight_vals = []
for i in range(len(weight_cv_vals)):
    w_cv = weight_cv_vals[i]
    if not (0 < w_cv < 30):
        w_cv = 1
    
    if files[i] == "nu_overlay_run1":
        net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[0] / nu_overlay_run1_pot)
    elif files[i] == "nu_overlay_run2":
        net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[1] / nu_overlay_run2_pot)
    elif files[i] == "nu_overlay_run3":
        net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[2] / nu_overlay_run3_pot)

    elif files[i] == "dirt_run1":
        if include_dirt:
            net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[0] / dirt_run1_pot)
        else:
            net_weight_vals.append(0)
    elif files[i] == "dirt_run2":
        if include_dirt:
            net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[1] / dirt_run2_pot)
        else:
            net_weight_vals.append(0)
    elif files[i] == "dirt_run3":
        if include_dirt:
            net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[2] / dirt_run3_pot)
        else:
            net_weight_vals.append(0)

    if files[i] == "ext_run1":
        if include_ext:
            net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[0] / ext_pots[0])
        else:
            net_weight_vals.append(0)
    elif files[i] == "ext_run2":
        if include_ext:
            net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[1] / ext_pots[1])
        else:
            net_weight_vals.append(0)
    elif files[i] == "ext_run3":
        if include_ext:
            net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[2] / ext_pots[2])
        else:
            net_weight_vals.append(0)
    
all_df["net_weight"] = net_weight_vals


In [None]:
weight_cv_vals = all_vars_df["weight_cv"].to_numpy()
weight_spline_vals = all_vars_df["weight_spline"].to_numpy()
files = all_vars_df["file"].to_numpy()
net_weight_vals = []
for i in range(len(weight_cv_vals)):
    w_cv = weight_cv_vals[i]
    if not (0 < w_cv < 30):
        w_cv = 1
    if files[i] == "nu_overlay_run1":
        net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[0] / nu_overlay_run1_pot)
    elif files[i] == "nu_overlay_run2":
        net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[1] / nu_overlay_run2_pot)
    elif files[i] == "nu_overlay_run3":
        net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[2] / nu_overlay_run3_pot)
    elif files[i] == "dirt_run1":
        if include_dirt:
            net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[0] / dirt_run1_pot)
        else:
            net_weight_vals.append(0)
    elif files[i] == "dirt_run2":
        if include_dirt:
            net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[1] / dirt_run2_pot)
        else:
            net_weight_vals.append(0)
    elif files[i] == "dirt_run3":
        if include_dirt:
            net_weight_vals.append(w_cv * weight_spline_vals[i] * data_pots[2] / dirt_run3_pot)
        else:
            net_weight_vals.append(0)
    
all_vars_df["net_weight"] = net_weight_vals


In [None]:
all_df["truth_muonMomentum_3"] = all_df["truth_muonMomentum[3]"].to_numpy()
all_df["true_muon_KE"] = all_df["truth_muonMomentum_3"].to_numpy()*1000.-105.66
all_df["true_muon_momentum"] = np.sqrt(all_df["true_muon_KE"]**2 + 2*all_df["true_muon_KE"]*105.66)

all_vars_df["truth_muonMomentum_3"] = all_vars_df["truth_muonMomentum[3]"].to_numpy()
all_vars_df["true_muon_KE"] = all_vars_df["truth_muonMomentum_3"].to_numpy()*1000.-105.66
all_vars_df["true_muon_momentum"] = np.sqrt(all_vars_df["true_muon_KE"]**2 + 2*all_vars_df["true_muon_KE"]*105.66)


In [None]:
selected_df = all_df.query("numu_cc_flag >= 0 and numu_score > 0.9 and nue_score < 7 and reco_muon_momentum>0")
selected_vars_df = all_vars_df.query("numu_cc_flag >= 0 and numu_score > 0.9 and nue_score < 7 and reco_muon_momentum>0")

sig_query = "match_completeness_energy>0.1*truth_energyInside and truth_nuPdg==14 and truth_isCC==1 and truth_vtxInside==1 and truth_muonMomentum_3>0 and truth_nuEnergy<=4000 and truth_nuEnergy > 200 and true_muon_momentum > 0 and true_muon_momentum <= 2500"

sig_sel_df = selected_df.query(sig_query)
bkg_sel_df = selected_df.query(f"not ({sig_query})")
mc_bkg_sel_df = bkg_sel_df.query("file != 'ext_run1' and file != 'ext_run2' and file != 'ext_run3'")
ext_sel_df = bkg_sel_df.query("file == 'ext_run1' or file == 'ext_run2' or file == 'ext_run3'")

print(sig_sel_df.shape)
print(bkg_sel_df.shape)


In [None]:
sig_sel_df[["file", "net_weight"]]

In [None]:
np.sum(sig_sel_df["net_weight"].to_numpy())

In [None]:
"""plt.figure()
plt.hist(all_df["file"].to_numpy(), bins=100)
plt.xticks(rotation=90)
plt.yscale("log")
plt.show()

plt.figure()
plt.hist(selected_df["file"].to_numpy(), bins=100)
plt.xticks(rotation=90)
plt.yscale("log")
plt.show()
"""

In [None]:
reco_hist = []

for containment in ["FC", "PC"]:
    
    if containment == "FC":
        containment_df = selected_df.query("match_isFC==1")
    else:
        containment_df = selected_df.query("match_isFC==0")
        
    for Enu_bin in range(4):
        
        if Enu_bin == 0:
            Enu_df = containment_df.query("200 < kine_reco_Enu <= 705")
        elif Enu_bin == 1:
            Enu_df = containment_df.query("705 < kine_reco_Enu < 1050")
        elif Enu_bin == 2:
            Enu_df = containment_df.query("1050 < kine_reco_Enu < 1570")
        elif Enu_bin == 3:
            Enu_df = containment_df.query("1570 < kine_reco_Enu < 4000")
        
        for theta_bin in range(9):
            
            if theta_bin == 0:
                theta_df = Enu_df.query("-1 < reco_costheta <= -0.5")
            elif theta_bin == 1:
                theta_df = Enu_df.query("-0.5 < reco_costheta <= 0.")
            elif theta_bin == 2:
                theta_df = Enu_df.query("0. < reco_costheta <= 0.27")
            elif theta_bin == 3:
                theta_df = Enu_df.query("0.27 < reco_costheta <= 0.45")
            elif theta_bin == 4:
                theta_df = Enu_df.query("0.45 < reco_costheta <= 0.62")
            elif theta_bin == 5:
                theta_df = Enu_df.query("0.62 < reco_costheta <= 0.76")
            elif theta_bin == 6:
                theta_df = Enu_df.query("0.76 < reco_costheta <= 0.86")
            elif theta_bin == 7:
                theta_df = Enu_df.query("0.86 < reco_costheta <= 0.94")
            else:
                theta_df = Enu_df.query("0.94 < reco_costheta <= 1.")
            
            reco_hist += list(np.histogram(theta_df["reco_muon_momentum"].to_numpy(), 
                                          weights=theta_df["net_weight"].to_numpy(),
                                          bins = [i*100 for i in range(16)] + [1e9] # fifteen bins from 0 to 1500 plus an overflow
                                         )[0])


In [None]:
sig_root_pred = []

for containment in ["FC", "PC"]:
    
    if containment == "FC":
        containment_df = sig_sel_df.query("match_isFC==1")
    else:
        containment_df = sig_sel_df.query("match_isFC==0")
        
    for Enu_bin in range(4):
        
        if Enu_bin == 0:
            Enu_df = containment_df.query("200 < kine_reco_Enu <= 705")
        elif Enu_bin == 1:
            Enu_df = containment_df.query("705 < kine_reco_Enu < 1050")
        elif Enu_bin == 2:
            Enu_df = containment_df.query("1050 < kine_reco_Enu < 1570")
        elif Enu_bin == 3:
            Enu_df = containment_df.query("1570 < kine_reco_Enu < 4000")
        
        for theta_bin in range(9):
            
            if theta_bin == 0:
                theta_df = Enu_df.query("-1 < reco_costheta <= -0.5")
            elif theta_bin == 1:
                theta_df = Enu_df.query("-0.5 < reco_costheta <= 0.")
            elif theta_bin == 2:
                theta_df = Enu_df.query("0. < reco_costheta <= 0.27")
            elif theta_bin == 3:
                theta_df = Enu_df.query("0.27 < reco_costheta <= 0.45")
            elif theta_bin == 4:
                theta_df = Enu_df.query("0.45 < reco_costheta <= 0.62")
            elif theta_bin == 5:
                theta_df = Enu_df.query("0.62 < reco_costheta <= 0.76")
            elif theta_bin == 6:
                theta_df = Enu_df.query("0.76 < reco_costheta <= 0.86")
            elif theta_bin == 7:
                theta_df = Enu_df.query("0.86 < reco_costheta <= 0.94")
            else:
                theta_df = Enu_df.query("0.94 < reco_costheta <= 1.")
            
            sig_root_pred += list(np.histogram(theta_df["reco_muon_momentum"].to_numpy(), 
                                          weights=theta_df["net_weight"].to_numpy(),
                                          bins = [i*100 for i in range(16)] + [1e9] # fifteen bins from 0 to 1500 plus an overflow
                                         )[0])
            
print(len(sig_root_pred))


In [None]:
mc_bkg_root_pred = []

for containment in ["FC", "PC"]:
    
    if containment == "FC":
        containment_df = mc_bkg_sel_df.query("match_isFC==1")
    else:
        containment_df = mc_bkg_sel_df.query("match_isFC==0")
        
    for Enu_bin in range(4):
        
        if Enu_bin == 0:
            Enu_df = containment_df.query("200 < kine_reco_Enu <= 705")
        elif Enu_bin == 1:
            Enu_df = containment_df.query("705 < kine_reco_Enu < 1050")
        elif Enu_bin == 2:
            Enu_df = containment_df.query("1050 < kine_reco_Enu < 1570")
        elif Enu_bin == 3:
            Enu_df = containment_df.query("1570 < kine_reco_Enu < 4000")
        
        for theta_bin in range(9):
            
            if theta_bin == 0:
                theta_df = Enu_df.query("-1 < reco_costheta <= -0.5")
            elif theta_bin == 1:
                theta_df = Enu_df.query("-0.5 < reco_costheta <= 0.")
            elif theta_bin == 2:
                theta_df = Enu_df.query("0. < reco_costheta <= 0.27")
            elif theta_bin == 3:
                theta_df = Enu_df.query("0.27 < reco_costheta <= 0.45")
            elif theta_bin == 4:
                theta_df = Enu_df.query("0.45 < reco_costheta <= 0.62")
            elif theta_bin == 5:
                theta_df = Enu_df.query("0.62 < reco_costheta <= 0.76")
            elif theta_bin == 6:
                theta_df = Enu_df.query("0.76 < reco_costheta <= 0.86")
            elif theta_bin == 7:
                theta_df = Enu_df.query("0.86 < reco_costheta <= 0.94")
            else:
                theta_df = Enu_df.query("0.94 < reco_costheta <= 1.")
            
            mc_bkg_root_pred += list(np.histogram(theta_df["reco_muon_momentum"].to_numpy(), 
                                          weights=theta_df["net_weight"].to_numpy(),
                                          bins = [i*100 for i in range(16)] + [1e9] # fifteen bins from 0 to 1500 plus an overflow
                                         )[0])


In [None]:
ext_root_pred = []

for containment in ["FC", "PC"]:
    
    if containment == "FC":
        containment_df = ext_sel_df.query("match_isFC==1")
    else:
        containment_df = ext_sel_df.query("match_isFC==0")
        
    for Enu_bin in range(4):
        
        if Enu_bin == 0:
            Enu_df = containment_df.query("200 < kine_reco_Enu <= 705")
        elif Enu_bin == 1:
            Enu_df = containment_df.query("705 < kine_reco_Enu < 1050")
        elif Enu_bin == 2:
            Enu_df = containment_df.query("1050 < kine_reco_Enu < 1570")
        elif Enu_bin == 3:
            Enu_df = containment_df.query("1570 < kine_reco_Enu < 4000")
        
        for theta_bin in range(9):
            
            if theta_bin == 0:
                theta_df = Enu_df.query("-1 < reco_costheta <= -0.5")
            elif theta_bin == 1:
                theta_df = Enu_df.query("-0.5 < reco_costheta <= 0.")
            elif theta_bin == 2:
                theta_df = Enu_df.query("0. < reco_costheta <= 0.27")
            elif theta_bin == 3:
                theta_df = Enu_df.query("0.27 < reco_costheta <= 0.45")
            elif theta_bin == 4:
                theta_df = Enu_df.query("0.45 < reco_costheta <= 0.62")
            elif theta_bin == 5:
                theta_df = Enu_df.query("0.62 < reco_costheta <= 0.76")
            elif theta_bin == 6:
                theta_df = Enu_df.query("0.76 < reco_costheta <= 0.86")
            elif theta_bin == 7:
                theta_df = Enu_df.query("0.86 < reco_costheta <= 0.94")
            else:
                theta_df = Enu_df.query("0.94 < reco_costheta <= 1.")
            
            ext_root_pred += list(np.histogram(theta_df["reco_muon_momentum"].to_numpy(), 
                                          weights=theta_df["net_weight"].to_numpy(),
                                          bins = [i*100 for i in range(16)] + [1e9] # fifteen bins from 0 to 1500 plus an overflow
                                         )[0])


In [None]:
mc_sig_pred = []
mc_bkg_pred = []
ext_pred = []
tot_pred = []
data = []
tot_pred_from_hmc = []

if use_genie_v2_fake_data: # seems like the genie v2 root files don't contain EXT blocks, should be fine since fake data never includes EXT

    for i in range(72):
        mc_sig_pred += list(f_merged[f"histo_{i+1}"].values(flow=True)[1:])
        mc_bkg_pred += list(f_merged[f"histo_{i+1 + 72}"].values(flow=True)[1:])
        ext_pred += [0 for _ in list(f_merged[f"histo_{i+1 + 1}"].values(flow=True)[1:])]
            
        tot_pred += list(f_merged[f"histo_{i+1}"].values(flow=True)[1:] + f_merged[f"histo_{i+1 + 72}"].values(flow=True)[1:])
        
        tot_pred_from_hmc += list(f_merged[f"hmc_obsch_{i+1}"].values(flow=True)[1:])
        
        data += list(f_merged[f"hdata_obsch_{i+1}"].values(flow=True)[1:])

else:

    for i in range(72):
        mc_sig_pred += list(f_merged[f"histo_{i+1}"].values(flow=True)[1:])
        mc_bkg_pred += list(f_merged[f"histo_{i+1 + 72}"].values(flow=True)[1:])
        ext_pred += list(f_merged[f"histo_{i+1 + 2 * 72}"].values(flow=True)[1:])
            
        tot_pred += list(f_merged[f"histo_{i+1}"].values(flow=True)[1:] + f_merged[f"histo_{i+1 + 72}"].values(flow=True)[1:] + f_merged[f"histo_{i+1 + 2 * 72}"].values(flow=True)[1:])
        
        tot_pred_from_hmc += list(f_merged[f"hmc_obsch_{i+1}"].values(flow=True)[1:])
        
        data += list(f_merged[f"hdata_obsch_{i+1}"].values(flow=True)[1:])




In [None]:
plt.rcParams.update({'font.size': 14})

plt.figure(figsize=(10, 6))
plt.plot(sig_root_pred, label="sig_root_pred, from WC ntuples")
plt.plot(mc_sig_pred, label="mc_sig_pred, from merge.root")
plt.legend()
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(np.array(sig_root_pred) - np.array(mc_sig_pred), label="sig_root_pred - mc_sig_pred")
plt.legend()
plt.show()



In [None]:
plt.rcParams.update({'font.size': 14})

plt.figure(figsize=(10, 6))
plt.plot(mc_bkg_root_pred, label="bkg_root_pred, from WC ntuples")
plt.plot(mc_bkg_pred, label="mc_bkg_pred, from merge.root")
plt.legend()
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(np.array(mc_bkg_root_pred) - np.array(mc_bkg_pred), label="mc_bkg_root_pred - mc_bkg_pred")
plt.legend()
plt.show()


In [None]:
plt.rcParams.update({'font.size': 14})

plt.figure(figsize=(10, 6))
plt.plot(ext_root_pred, label="ext_root_pred, from WC ntuples")
plt.plot(ext_pred, label="ext_pred, from merge.root")
plt.legend()
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(np.array(ext_root_pred) - np.array(ext_pred), label="ext_root_pred - ext_pred")
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
plt.plot(reco_hist, label="reco_hist, from WC ntuples")
plt.plot(tot_pred, label="tot_pred, from merge.root")
plt.legend()
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(np.array(reco_hist) - np.array(tot_pred), label="reco_hist - tot_pred (merge.root - WC ntuples)")
plt.legend()
plt.show()



In [None]:
cov_17_from_london = uproot.open("numuCC_3d_data/nuwro_fake_data/XsFlux/cov_17.root")

num_reco_bins = 1152
num_truth_bins = 3456

if use_genie_v2_fake_data: # I think EXT was not included in merge_xs.root, so here we need to add more zeros ourselves
    mat_collapse = np.zeros((num_truth_bins, num_reco_bins))
    mat_collapse[:2304, :] = f_merged["mat_collapse"].member("fElements").reshape((2304, num_reco_bins))
else:
    mat_collapse = f_merged["mat_collapse"].member("fElements").reshape((num_truth_bins, num_reco_bins))

if use_genie_v2_fake_data:
    cov_17_vec_mean_from_london = mat_collapse.T @ cov_17_from_london["vec_mean_17"].member("fElements")[:num_truth_bins] # EXT is included in vec_mean_17, even though it's not in the covariance matrix?
    cov_17_arr_from_london = mat_collapse.T @ cov_17_from_london["cov_xf_mat_17"].member("fElements").reshape((num_truth_bins, num_truth_bins)) @ mat_collapse
else:
    cov_17_vec_mean_from_london = mat_collapse.T @ cov_17_from_london["vec_mean_17"].member("fElements")
    cov_17_arr_from_london = mat_collapse.T @ cov_17_from_london["cov_xf_mat_17"].member("fElements").reshape((num_truth_bins, num_truth_bins)) @ mat_collapse


plt.figure(figsize=(10, 6))
plt.plot(reco_hist, label="reco_hist, from WC ntuples")
plt.plot(tot_pred, label="tot_pred, from merge.root")
plt.plot(cov_17_vec_mean_from_london, label="cov_17_vec_mean_from_london")
plt.legend()
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(np.array(cov_17_vec_mean_from_london) - np.array(tot_pred), label="cov_17_vec_mean_from_london - tot_pred")
plt.legend()
plt.show()

plt.figure(figsize=(10, 6))
plt.plot((np.array(cov_17_vec_mean_from_london) - np.array(tot_pred)) / np.array(tot_pred), label="(cov_17_vec_mean_from_london - tot_pred) / tot_pred")
plt.legend()
plt.ylim(-0.01, 0.5)
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(cov_17_vec_mean_from_london[:30], label="cov_17_vec_mean_from_london")
plt.plot(tot_pred[:30], label="tot_pred")
plt.legend()
plt.show()

print(cov_17_vec_mean_from_london[:30])
print(tot_pred[:30])

plt.figure(figsize=(10, 6))
plt.plot((np.array(cov_17_vec_mean_from_london) - np.array(tot_pred)) / np.array(tot_pred), label="(cov_17_vec_mean_from_london - tot_pred) / tot_pred")
plt.legend()
plt.ylim(-0.01, 0.1)
plt.xlim(0, 30)
plt.show()


In [None]:
np.sqrt(11943936)

In [None]:
if regenerate_universes:

    xs_cv_reco_hist = []
    universe_reco_hists = [[] for _ in range(600)]

    unisim_reco_hist_dic = {}
    for unisim_type in ["AxFFCCQEshape_UBGenie",
                                    "DecayAngMEC_UBGenie",
                                    "NormCCCOH_UBGenie",
                                    "NormNCCOH_UBGenie",
                                    "RPA_CCQE_UBGenie",
                                    "ThetaDelta2NRad_UBGenie",
                                    "Theta_Delta2Npi_UBGenie",
                                    "VecFFCCQEshape_UBGenie",
                                    "XSecShape_CCMEC_UBGenie",
                                    "xsr_scc_Fa3_SCC",
                                    "xsr_scc_Fv3_SCC"]:

        unisim_reco_hist_dic[unisim_type] = [[] for _ in range(num_unisim_variations_dic[unisim_type])]

    muon_momentum_bins = [i*100 for i in range(16)] + [1e9] # fifteen bins from 0 to 1500 plus an overflow

    pbar = tqdm(total=2*4*9)

    for containment in ["FC", "PC"]:
        if containment == "FC":
            containment_df = selected_vars_df.query("match_isFC==1")
        else:
            containment_df = selected_vars_df.query("match_isFC==0")
        for Enu_bin in range(4):
            if Enu_bin == 0:
                Enu_df = containment_df.query("200 < kine_reco_Enu <= 705")
            elif Enu_bin == 1:
                Enu_df = containment_df.query("705 < kine_reco_Enu < 1050")
            elif Enu_bin == 2:
                Enu_df = containment_df.query("1050 < kine_reco_Enu < 1570")
            elif Enu_bin == 3:
                Enu_df = containment_df.query("1570 < kine_reco_Enu < 4000")
            for theta_bin in range(9):
                if theta_bin == 0:
                    theta_df = Enu_df.query("-1 < reco_costheta <= -0.5")
                elif theta_bin == 1:
                    theta_df = Enu_df.query("-0.5 < reco_costheta <= 0.")
                elif theta_bin == 2:
                    theta_df = Enu_df.query("0. < reco_costheta <= 0.27")
                elif theta_bin == 3:
                    theta_df = Enu_df.query("0.27 < reco_costheta <= 0.45")
                elif theta_bin == 4:
                    theta_df = Enu_df.query("0.45 < reco_costheta <= 0.62")
                elif theta_bin == 5:
                    theta_df = Enu_df.query("0.62 < reco_costheta <= 0.76")
                elif theta_bin == 6:
                    theta_df = Enu_df.query("0.76 < reco_costheta <= 0.86")
                elif theta_bin == 7:
                    theta_df = Enu_df.query("0.86 < reco_costheta <= 0.94")
                else:
                    theta_df = Enu_df.query("0.94 < reco_costheta <= 1.")

                pbar.update(1)

                curr_slice_cv = np.histogram(theta_df["reco_muon_momentum"].to_numpy(), weights=theta_df["net_weight"].to_numpy(), bins=muon_momentum_bins)[0]
                xs_cv_reco_hist += list(curr_slice_cv)

                for i in range(600):
                    curr_All_UBGenie_weights = [_[i] for _ in theta_df["All_UBGenie"].to_numpy()]
                    rel_weight_diffs = curr_All_UBGenie_weights / theta_df["weight_cv"].to_numpy()
                    # https://github.com/BNLIF/wcp-uboone-bdt/blob/main/src/mcm_2.h#L262-L264
                    rel_weight_diffs = np.where(np.abs(rel_weight_diffs) > 100, 1, rel_weight_diffs)
                    rel_weight_diffs = np.nan_to_num(rel_weight_diffs, nan=0)
                    curr_slice_uni = np.histogram(theta_df["reco_muon_momentum"].to_numpy(), weights=theta_df["net_weight"].to_numpy()*rel_weight_diffs, bins=muon_momentum_bins)[0]
                    universe_reco_hists[i] += list(curr_slice_uni)

                for unisim_type in ["AxFFCCQEshape_UBGenie",
                                    "DecayAngMEC_UBGenie",
                                    "NormCCCOH_UBGenie",
                                    "NormNCCOH_UBGenie",
                                    "RPA_CCQE_UBGenie",
                                    "ThetaDelta2NRad_UBGenie",
                                    "Theta_Delta2Npi_UBGenie",
                                    "VecFFCCQEshape_UBGenie",
                                    "XSecShape_CCMEC_UBGenie",
                                    "xsr_scc_Fa3_SCC",
                                    "xsr_scc_Fv3_SCC",]:

                    num_unisim_variations = num_unisim_variations_dic[unisim_type]
                    for j in range(num_unisim_variations):    

                        curr_weights = [_[j] for _ in theta_df[unisim_type].to_numpy()]
                        if not(unisim_type == "xsr_scc_Fa3_SCC" or unisim_type == "xsr_scc_Fv3_SCC"):
                            rel_weight_diffs = curr_weights / theta_df["weight_cv"].to_numpy()

                        # https://github.com/BNLIF/wcp-uboone-bdt/blob/main/src/mcm_2.h#L262-L264
                        rel_weight_diffs = np.where(np.abs(rel_weight_diffs) > 100, 1, rel_weight_diffs)
                        rel_weight_diffs = np.nan_to_num(rel_weight_diffs, nan=0)

                        curr_unisim_slice = np.histogram(theta_df["reco_muon_momentum"].to_numpy(), weights=theta_df["net_weight"].to_numpy()*rel_weight_diffs, bins=muon_momentum_bins)[0]
                        unisim_reco_hist_dic[unisim_type][j] += list(curr_unisim_slice)

    pbar.close()

    if use_real_data:
        pickle.dump((xs_cv_reco_hist, universe_reco_hists, unisim_reco_hist_dic), open("universes_v6_real.pkl", "wb"))
    elif use_nuwro_fake_data:
        pickle.dump((xs_cv_reco_hist, universe_reco_hists, unisim_reco_hist_dic), open("universes_v6_nuwro.pkl", "wb"))
    elif use_genie_v2_fake_data:
        pickle.dump((xs_cv_reco_hist, universe_reco_hists, unisim_reco_hist_dic), open("universes_v6_genie_v2.pkl", "wb"))
else:
    if use_real_data:
        xs_cv_reco_hist, universe_reco_hists, unisim_reco_hist_dic = pickle.load(open("universes_v6_real.pkl", "rb"))
    elif use_nuwro_fake_data:
        xs_cv_reco_hist, universe_reco_hists, unisim_reco_hist_dic = pickle.load(open("universes_v6_nuwro.pkl", "rb"))
    elif use_genie_v2_fake_data:
        xs_cv_reco_hist, universe_reco_hists, unisim_reco_hist_dic = pickle.load(open("universes_v6_genie_v2.pkl", "rb"))
    
uncollapsed_dim = len(universe_reco_hists[0])


In [None]:
if collapse_2d:

    # collapsing everything to muon momentum and muon angle
    # Combining FC/PC and Enu bins

    collapsed_reco_hist = np.zeros(16*9)
    collapsed_tot_pred = np.zeros(16*9)
    collapsed_data = np.zeros(16*9)

    for i in range(1152):
        collapsed_reco_hist[i%(16*9)] += reco_hist[i]
        collapsed_tot_pred[i%(16*9)] += tot_pred[i]
        collapsed_data[i%(16*9)] += data[i]

    collapsed_universe_reco_hists = []
    for uni_i in range(600):
        vals = np.zeros(16*9)
        for i in range(1152):
            vals[i%(16*9)] += universe_reco_hists[uni_i][i]
        collapsed_universe_reco_hists.append(vals)

    collapsed_unisim_reco_hist_dic = {}
    for k, v in unisim_reco_hist_dic.items():
        if k not in collapsed_unisim_reco_hist_dic:
            collapsed_unisim_reco_hist_dic[k] = []
        for uni_i in range(len(v)):
            vals = np.zeros(16*9)
            for i in range(1152):
                vals[i%(16*9)] += v[uni_i][i]
            collapsed_unisim_reco_hist_dic[k].append(vals)


    reco_hist = collapsed_reco_hist
    tot_pred = collapsed_tot_pred
    data = collapsed_data

    universe_reco_hists = collapsed_universe_reco_hists
    unisim_reco_hist_dic = collapsed_unisim_reco_hist_dic

elif collapse_1d:

    # collapsing everything to muon momentum
    # Combining FC/PC, Enu, and theta bins

    collapsed_reco_hist = np.zeros(16)
    collapsed_tot_pred = np.zeros(16)
    collapsed_data = np.zeros(16)

    for i in range(1152):
        collapsed_reco_hist[i%16] += reco_hist[i]
        collapsed_tot_pred[i%16] += tot_pred[i]
        collapsed_data[i%16] += data[i]

    collapsed_universe_reco_hists = []
    for uni_i in range(600):
        vals = np.zeros(16)
        for i in range(1152):
            vals[i%16] += universe_reco_hists[uni_i][i]
        collapsed_universe_reco_hists.append(vals)

    collapsed_unisim_reco_hist_dic = {}
    for k, v in unisim_reco_hist_dic.items():
        if k not in collapsed_unisim_reco_hist_dic:
            collapsed_unisim_reco_hist_dic[k] = []
        for uni_i in range(len(v)):
            vals = np.zeros(16)
            for i in range(1152):
                vals[i%16] += v[uni_i][i]
            collapsed_unisim_reco_hist_dic[k].append(vals)

    reco_hist = collapsed_reco_hist
    tot_pred = collapsed_tot_pred
    data = collapsed_data

    universe_reco_hists = collapsed_universe_reco_hists
    unisim_reco_hist_dic = collapsed_unisim_reco_hist_dic


collapsed_dim = len(universe_reco_hists[0])
collapsed_plus_dim = collapsed_dim + 3



In [None]:
MA_values = np.genfromtxt("knob_values/MaCCQE_univs.txt")
MEC_values = np.genfromtxt("knob_values/NormCCMEC_univs_v2.txt")
lambda_values = [np.sum(universe_reco_hists[i]) / np.sum(reco_hist) for i in range(600)]

print(np.mean(MA_values), np.std(MA_values))
print(np.mean(MEC_values), np.std(MEC_values))
print(np.mean(lambda_values), np.std(lambda_values))

In [None]:
tot_pred_MA = list(tot_pred) + [1.10, 1.66, 1]


universe_reco_MAs = []
if shape_type == "rate+shape" or shape_type == "+100":
    for i in range(600):
        universe_reco_MAs.append(np.array(list(universe_reco_hists[i]) + [MA_values[i], MEC_values[i], lambda_values[i]]))
elif shape_type == "matrix_breakdown":
    for uni_i in range(600):
        universe_reco_MAs.append(np.array(list(universe_reco_hists[uni_i] / np.sum(universe_reco_hists[uni_i])) + [MA_values[uni_i], MEC_values[i], lambda_values[uni_i]]))
    not_normed_tot_pred_MA = tot_pred_MA.copy()
    tot_pred_MA = list(tot_pred_MA[:-3] / np.sum(tot_pred_MA[:-3])) + [1.10, 1.66, 1]
    data = data / np.sum(data)


In [None]:
dim = np.array(tot_pred_MA).shape[0]

multisim_xs_MA_cov = np.zeros((dim, dim))

for uni_i in tqdm(range(600)):
    uni_reco_MA = universe_reco_MAs[uni_i]
    row_diffs = np.tile(uni_reco_MA - tot_pred_MA, (dim, 1))
    col_diffs = np.tile(np.reshape(uni_reco_MA - tot_pred_MA, (dim, 1)), (1, dim))
    multisim_xs_MA_cov += row_diffs * col_diffs

multisim_xs_MA_cov = multisim_xs_MA_cov / 600.


In [None]:
# this is fixing the fact that some of these two-length arrays contain the CV rather than a variation
# so really we want to divide by one and not two in that case
unisim_divide_number_dic = {
    "AxFFCCQEshape_UBGenie": 1,
    "DecayAngMEC_UBGenie": 1,
    "NormCCCOH_UBGenie": 1,
    "NormNCCOH_UBGenie": 1,
    "RPA_CCQE_UBGenie": 2,
    "ThetaDelta2NRad_UBGenie": 1,
    "Theta_Delta2Npi_UBGenie": 1,
    "VecFFCCQEshape_UBGenie": 1,
    "XSecShape_CCMEC_UBGenie": 1,
    "xsr_scc_Fa3_SCC": 10,
    "xsr_scc_Fv3_SCC": 10,
}

In [None]:
# adding unisim variations as well

unisim_xs_MA_cov = np.zeros((collapsed_plus_dim, collapsed_plus_dim))

# pbar is inaccurate if skip_AxFFCCQEshape_UBGenie
pbar = tqdm(total=np.sum(list(num_unisim_variations_dic.values())))

if skip_AxFFCCQEshape_UBGenie:
    unisim_types = ["DecayAngMEC_UBGenie",
                    "NormCCCOH_UBGenie",
                    "NormNCCOH_UBGenie",
                    "RPA_CCQE_UBGenie",
                    "ThetaDelta2NRad_UBGenie",
                    "Theta_Delta2Npi_UBGenie",
                    "VecFFCCQEshape_UBGenie",
                    "XSecShape_CCMEC_UBGenie",
                    "xsr_scc_Fa3_SCC",
                    "xsr_scc_Fv3_SCC",]
else:
    unisim_types = ["AxFFCCQEshape_UBGenie",
                    "DecayAngMEC_UBGenie",
                    "NormCCCOH_UBGenie",
                    "NormNCCOH_UBGenie",
                    "RPA_CCQE_UBGenie",
                    "ThetaDelta2NRad_UBGenie",
                    "Theta_Delta2Npi_UBGenie",
                    "VecFFCCQEshape_UBGenie",
                    "XSecShape_CCMEC_UBGenie",
                    "xsr_scc_Fa3_SCC",
                    "xsr_scc_Fv3_SCC",]

unisim_diag_errs = {}

for unisim_type in unisim_types:

    curr_unisim_xs_MA_cov = np.zeros((collapsed_plus_dim, collapsed_plus_dim))

    for j in range(num_unisim_variations_dic[unisim_type]):

        pbar.update(1)

        diff = np.array(unisim_reco_hist_dic[unisim_type][j]) - np.array(reco_hist)
        diff = np.append(diff, np.array([0, 0, 0]))

        row_diffs = np.tile(diff, (collapsed_plus_dim, 1))
        col_diffs = np.tile(np.reshape(diff, (collapsed_plus_dim, 1)), (1, collapsed_plus_dim))
        curr_unisim_xs_MA_cov += row_diffs * col_diffs
        
    #curr_unisim_xs_MA_cov = curr_unisim_xs_MA_cov / num_unisim_variations_dic[unisim_type]
    curr_unisim_xs_MA_cov = curr_unisim_xs_MA_cov / unisim_divide_number_dic[unisim_type]
    unisim_xs_MA_cov += curr_unisim_xs_MA_cov

    unisim_diag_errs[unisim_type] = np.sqrt(np.diag(curr_unisim_xs_MA_cov))


pbar.close()

xs_MA_cov = multisim_xs_MA_cov + unisim_xs_MA_cov

total_unisim_diag_errs = np.sqrt(np.diag(unisim_xs_MA_cov))
total_multi_diag_errs = np.sqrt(np.diag(multisim_xs_MA_cov))

total_xs_diag_errs = np.sqrt(np.diag(xs_MA_cov))


In [None]:
plt.figure(figsize=(10, 6))
for unisim_type in unisim_types:
    plt.plot(unisim_diag_errs[unisim_type][:-3] / tot_pred, label=unisim_type, ls="--")
plt.plot(total_unisim_diag_errs[:-3] / tot_pred, label="total unisim", c="k", lw=2)
plt.ylim(0, 0.5)
plt.xlim(0, 50)
plt.xlabel("Bin Number")
plt.ylabel("Fractional Uncertainty")
plt.legend()
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(total_multi_diag_errs[:-3] / tot_pred, label="total multisim", c="k", lw=2)
plt.ylim(0, 0.5)
plt.xlim(0, 50)
plt.legend()
plt.xlabel("Bin Number")
plt.ylabel("Fractional Uncertainty")
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(total_xs_diag_errs[:-3] / tot_pred, label="total XS")
cov_17_from_london_frac_errs = np.sqrt(np.diag(cov_17_arr_from_london)) / cov_17_vec_mean_from_london
plt.plot(cov_17_from_london_frac_errs, label="total XS from London's cov_17.root")
plt.ylim(0, 0.5)
plt.xlim(0, 50)
plt.legend()
plt.xlabel("Bin Number")
plt.ylabel("Fractional Uncertainty")
plt.show()


In [None]:
if not (collapse_2d or collapse_1d):

    plt.figure(figsize=(10, 6))
    plt.plot((total_xs_diag_errs[:-3] / tot_pred) / (np.sqrt(np.diag(cov_17_arr_from_london)) / cov_17_vec_mean_from_london) - 1, label="recalc_xs_frac_err / london_framework_xs_frac_err - 1")
    plt.plot(np.array(tot_pred) / 2500., label="tot_pred, arbitrary norm")
    plt.axhline(0, c="k", ls="--")
    plt.legend()
    plt.xlabel("Bin Number")
    plt.ylim(-2, 2)
    plt.show()

    plt.figure(figsize=(10, 6))
    plt.plot((total_xs_diag_errs[:-3] / tot_pred) / (np.sqrt(np.diag(cov_17_arr_from_london)) / cov_17_vec_mean_from_london) - 1, label="recalc_xs_frac_err / london_framework_xs_frac_err - 1")
    plt.plot(np.array(tot_pred) / 2500., label="tot_pred, arbitrary norm")
    plt.axhline(0, c="k", ls="--")
    plt.legend()
    plt.xlabel("Bin Number")
    plt.ylim(-2, 2)
    plt.xlim(0, 200)
    plt.show()


In [None]:
# using Pearson data stat uncertainty
pearson_data_stat_cov_matrix = np.zeros((collapsed_dim, collapsed_dim))
for i in range(collapsed_dim):
    for j in range(collapsed_dim):
        if i == j:
            pearson_data_stat_cov_matrix[i][j] = tot_pred[i]
cov_data_stat_new = pearson_data_stat_cov_matrix


In [None]:
cov_stat = f_wiener["hcov_stat"].to_numpy()[0]
cov_mcstat = f_wiener["hcov_mcstat"].to_numpy()[0]
cov_add = f_wiener["hcov_add"].to_numpy()[0]
cov_det = f_wiener["hcov_det"].to_numpy()[0]
cov_flux = f_wiener["hcov_flux"].to_numpy()[0]
#cov_xs = f_wiener["hcov_xs"].to_numpy()[0]
#cov_tot = f_wiener["hcov_tot"].to_numpy()[0]

cov_stat.shape


In [None]:
if collapse_2d:

    collapsing_matrix = [[1] + [0 for _ in range(16*9-1)]]
    for i in range(1152):
        if i == 0:
            continue
        collapsing_matrix.append([0 for _ in range(i%(16*9))] + [1] + [0 for _ in range(16*9 - i%(16*9) - 1)])
    collapsing_matrix = np.array(collapsing_matrix)

    #cov_stat = np.linalg.multi_dot([np.transpose(collapsing_matrix), cov_stat, collapsing_matrix])
    cov_mcstat = np.linalg.multi_dot([np.transpose(collapsing_matrix), cov_mcstat, collapsing_matrix])
    cov_add = np.linalg.multi_dot([np.transpose(collapsing_matrix), cov_add, collapsing_matrix])
    cov_det = np.linalg.multi_dot([np.transpose(collapsing_matrix), cov_det, collapsing_matrix])
    cov_flux = np.linalg.multi_dot([np.transpose(collapsing_matrix), cov_flux, collapsing_matrix])

    cov_stat_MA = np.append(np.append(cov_data_stat_new, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_mcstat_MA = np.append(np.append(cov_mcstat, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_add_MA = np.append(np.append(cov_add, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_det_MA = np.append(np.append(cov_det, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_flux_MA = np.append(np.append(cov_flux, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)


elif collapse_1d:

    collapsing_matrix = [[1] + [0 for _ in range(16-1)]]
    for i in range(1152):
        if i == 0:
            continue
        collapsing_matrix.append([0 for _ in range(i%16)] + [1] + [0 for _ in range(16 - i%16 - 1)])
    collapsing_matrix = np.array(collapsing_matrix)

    #cov_stat = np.linalg.multi_dot([np.transpose(collapsing_matrix), cov_stat, collapsing_matrix])
    cov_mcstat = np.linalg.multi_dot([np.transpose(collapsing_matrix), cov_mcstat, collapsing_matrix])
    cov_add = np.linalg.multi_dot([np.transpose(collapsing_matrix), cov_add, collapsing_matrix])
    cov_det = np.linalg.multi_dot([np.transpose(collapsing_matrix), cov_det, collapsing_matrix])
    cov_flux = np.linalg.multi_dot([np.transpose(collapsing_matrix), cov_flux, collapsing_matrix])

    cov_stat_MA = np.append(np.append(cov_data_stat_new, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_mcstat_MA = np.append(np.append(cov_mcstat, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_add_MA = np.append(np.append(cov_add, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_det_MA = np.append(np.append(cov_det, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_flux_MA = np.append(np.append(cov_flux, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)

else:
    cov_stat_MA = np.append(np.append(cov_data_stat_new, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_mcstat_MA = np.append(np.append(cov_mcstat, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_add_MA = np.append(np.append(cov_add, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_det_MA = np.append(np.append(cov_det, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)
    cov_flux_MA = np.append(np.append(cov_flux, np.zeros((3, collapsed_dim)), axis=0), np.zeros((collapsed_dim+3,3)), axis=1)

total_cov_MA = (xs_MA_cov
        + cov_stat_MA
        + cov_mcstat_MA
        + cov_add_MA
        + cov_det_MA
        + cov_flux_MA
        )


In [None]:
n = total_cov_MA.shape[0]

if shape_type == "+100":

    percent_normalization_error = 100.

    dim = 1152

    row_diffs = np.tile(tot_pred, (dim, 1))
    col_diffs = np.tile(np.reshape(tot_pred, (dim, 1)), (1, dim))
    extra_normalization_cov = row_diffs * col_diffs
    extra_normalization_cov = np.append(
        np.append(
            extra_normalization_cov, np.zeros((3, dim)),
        axis=0), 
        np.zeros((dim+3,3))
    , axis=1)

    total_cov_MA += extra_normalization_cov * percent_normalization_error**2 / (100. * 100.)

elif shape_type == "matrix_breakdown":

    # non xs cov_MA
    M = (cov_stat_MA
       + cov_mcstat_MA
       + cov_add_MA
       + cov_det_MA
       + cov_flux_MA)


    # from docDB 5926

    M_s = np.zeros((n, n))
    M_n = np.zeros((n, n))
    M_m = np.zeros((n, n))

    N = np.array(not_normed_tot_pred_MA)
    N_T = np.sum(N)
    row_sums = [np.sum(M[i, :]) for i in range(n)]
    normalized_N = N / N_T
    M_sum = np.sum(M)

    row_sum_terms = []
    matrix_sum_terms = []
    normalized_N_squared = []

    print("extracting non-XS covariance matrix normalization component...")
    for i in tqdm(range(n)):
        for j in range(n):

            M_s[i][j] = (M[i][j]
                 - normalized_N[j] * row_sums[i]
                 - normalized_N[i] * row_sums[j]
                 + normalized_N[i] * normalized_N[j] * M_sum
                ) / (N_T * N_T)

            M_m[i][j] = (normalized_N[j] * row_sums[i]
                 + normalized_N[i] * row_sums[j]
                 - 2. * normalized_N[i] * normalized_N[j] * M_sum
                ) / (N_T * N_T)

            M_n[i][j] = normalized_N[i] * normalized_N[j] * M_sum / (N_T * N_T)


    total_cov_MA = M_s + xs_MA_cov


In [None]:
trio_prior = [1.1, 1.66, 1.]

cov_cross = total_cov_MA[-3:, :-3]
cov_constraining = total_cov_MA[:-3, :-3]
cov_prior = total_cov_MA[-3:, -3:]
inv_cov_constraining = np.linalg.inv(cov_constraining)


In [None]:
print("prior M_A: ", trio_prior[0], "+/-", np.sqrt(cov_prior[0][0]))
print("prior NormCCMEC: ", trio_prior[1], "+/-", np.sqrt(cov_prior[1][1]))
print("prior lambda: ", trio_prior[2], "+/-", np.sqrt(cov_prior[2][2]))


# Generating Real Or Fake Data Point

In [None]:
data_constrained_trio = tot_pred_MA[-3:] + np.linalg.multi_dot(
    [cov_cross, inv_cov_constraining, np.array(data) - np.array(tot_pred_MA[:-3])]
)
data_constrained_trio_cov = total_cov_MA[-3:,-3:] - np.linalg.multi_dot(
    [cov_cross, inv_cov_constraining, np.transpose(cov_cross)]
)


print("constrained M_A: ", data_constrained_trio[0], "+/-", np.sqrt(data_constrained_trio_cov[0][0]))
print("constrained NormCCMEC: ", data_constrained_trio[1], "+/-", np.sqrt(data_constrained_trio_cov[1][1]))
print("constrained lambda: ", data_constrained_trio[2], "+/-", np.sqrt(data_constrained_trio_cov[2][2]))


# Generating Asimov Point

In [None]:
fake_data = np.array(tot_pred_MA[:-3])

asimov_constrained_trio = tot_pred_MA[-3:] + np.linalg.multi_dot(
    [cov_cross, inv_cov_constraining, fake_data - np.array(tot_pred_MA[:-3])]
)
asimov_constrained_trio_cov = total_cov_MA[-3:,-3:] - np.linalg.multi_dot(
    [cov_cross, inv_cov_constraining, np.transpose(cov_cross)]
)

print("constrained M_A: ", asimov_constrained_trio[0], "+/-", np.sqrt(asimov_constrained_trio_cov[0][0]))
print("constrained NormCCMEC: ", asimov_constrained_trio[1], "+/-", np.sqrt(asimov_constrained_trio_cov[1][1]))
print("constrained lambda: ", asimov_constrained_trio[2], "+/-", np.sqrt(asimov_constrained_trio_cov[2][2]))


# Generating Fake Data Samples From Cov Matrix

In [None]:
fake_datas_reco_MAs = np.random.multivariate_normal(tot_pred_MA, total_cov_MA, size=600)

cov_mat_trios = []
cov_mat_trio_covs = []
cov_mat_true_trios = []

for uni_i in tqdm(range(600)):

    # CV with cov matrix variations
    fake_data = fake_datas_reco_MAs[uni_i][:-3]
    

    fake_constrained_trio = tot_pred_MA[-3:] + np.linalg.multi_dot(
        [cov_cross, inv_cov_constraining, fake_data - np.array(tot_pred_MA[:-3])]
    )
    fake_constrained_trio_cov = total_cov_MA[-3:,-3:] - np.linalg.multi_dot(
        [cov_cross, inv_cov_constraining, np.transpose(cov_cross)]
    )

    cov_mat_trios.append(fake_constrained_trio)
    cov_mat_trio_covs.append(fake_constrained_trio_cov)
    cov_mat_true_trios.append(fake_datas_reco_MAs[uni_i][-3:])



# Generating Fake Data Samples From XS Cov Matrix

In [None]:
fake_datas_reco_MAs = np.random.multivariate_normal(tot_pred_MA, xs_MA_cov, size=600)

xs_cov_mat_trios = []
xs_cov_mat_trio_covs = []
xs_cov_mat_true_trios = []

for uni_i in tqdm(range(600)):

    # CV with cov matrix variations
    fake_data = fake_datas_reco_MAs[uni_i][:-3]

    fake_constrained_trio = tot_pred_MA[-3:] + np.linalg.multi_dot(
        [cov_cross, inv_cov_constraining, fake_data - np.array(tot_pred_MA[:-3])]
    )
    fake_constrained_trio_cov = total_cov_MA[-3:,-3:] - np.linalg.multi_dot(
        [cov_cross, inv_cov_constraining, np.transpose(cov_cross)]
    )

    xs_cov_mat_trios.append(fake_constrained_trio)
    xs_cov_mat_trio_covs.append(fake_constrained_trio_cov)
    xs_cov_mat_true_trios.append(fake_datas_reco_MAs[uni_i][-3:])


# Generating Fake Data Samples From GENIE Universes

In [None]:
genie_trios = []
genie_trio_covs = []
genie_true_trios = []

for uni_i in tqdm(range(600)):

    # CV with GENIE variations
    fake_data = universe_reco_MAs[uni_i][:-3]

    fake_constrained_trio = tot_pred_MA[-3:] + np.linalg.multi_dot(
        [cov_cross, inv_cov_constraining, fake_data - np.array(tot_pred_MA[:-3])]
    )
    fake_constrained_trio_cov = total_cov_MA[-3:,-3:] - np.linalg.multi_dot(
        [cov_cross, inv_cov_constraining, np.transpose(cov_cross)]
    )

    genie_trios.append(fake_constrained_trio)
    genie_trio_covs.append(fake_constrained_trio_cov)
    genie_true_trios.append(universe_reco_MAs[uni_i][-3:])


# Saving To Pickle

In [None]:
name = "trio_pickles/"

if use_real_data:
    name += "real"
elif use_nuwro_fake_data:
    name += "nuwro_fake"
elif use_genie_v2_fake_data:
    name += "genie_v2_fake"

name += "_" + shape_type

if collapse_2d:
    name += "_2d"
elif collapse_1d:
    name += "_1d"
else:
    name += "_3d"

with open(f'{name}.pkl', 'wb') as handle:
    tup = (

        trio_prior, cov_prior,

        data_constrained_trio, data_constrained_trio_cov,
        asimov_constrained_trio, asimov_constrained_trio_cov,
        
        cov_mat_true_trios, cov_mat_trios, cov_mat_trio_covs,
        xs_cov_mat_true_trios, xs_cov_mat_trios, xs_cov_mat_trio_covs,
        genie_true_trios, genie_trios, genie_trio_covs
    )

    pickle.dump(tup, handle)
