In [None]:
import numpy as np
import uproot as uproot
import uproot3 as uproot3
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd

from tqdm.notebook import tqdm

from get_MA_trio_cov_mat import get_MA_trio_cov_mat_pred, extract_trio

import pickle


In [None]:
nuwro_fake_data_true_MA = 1.03 # From Afro slack 2024_06_10
genie_v2_fake_data_true_MA = 0.99 # Nitish slack 2023_04_25


In [None]:
plt.rcParams.update({'font.size': 16})
s = 20

def plot_bold_MA_value(true_MA, extracted_MA, extracted_MA_sigma, label, color, zorder):
    plt.errorbar(true_MA, extracted_MA, yerr=extracted_MA_sigma, fmt="o", markersize=6, c=color, capthick=2, elinewidth=2, capsize=5, zorder=zorder, 
            label=label + f": {extracted_MA:.3f} "r"$\pm$"f" {extracted_MA_sigma:.3f} "
            r"$\mathrm{GeV/c^2}$"f"\n\t\t\tTrue: {true_MA:.3f} "r"$\mathrm{GeV/c^2}$""\n\t\t\t"
            fr"$\sigma = {np.abs(true_MA - extracted_MA)/extracted_MA_sigma:.3f}$")
    plt.errorbar(true_MA, extracted_MA, yerr=extracted_MA_sigma, fmt="o", markersize=8, c="k", capthick=4, elinewidth=4, capsize=6, zorder=9)

bins = np.linspace(-5, 5, 21)
bin_width = bins[1] - bins[0]

x = np.linspace(-5, 5, 1000)


# Nominal Setup

In [None]:
filename = "plots/fake_data_distribution"

# GENIE v2 fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "GENIE_v2"
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma

# NuWro fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "NuWro"
)
# using NuWro setup for Asimov data and setting prior variables
asimov_data = pred_MA[:-3]
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, asimov_data)
asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
asimov_data_extracted_MA, asimov_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma
trio_prior = pred_MA[-3:]
cov_prior = cov_MA[-3:, -3:]
prior_MA = trio_prior[0]
prior_MA_sigma = np.sqrt(cov_prior[0][0])
asimov_true_MA = prior_MA
# NuWro fake data
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma

# using NuWro setup for fluctuation fake data tests
total_sys_data_true_fluctuations = np.random.multivariate_normal(pred_MA, cov_MA, size=600)
total_sys_true_MAs = np.array([x[-3] for x in total_sys_data_true_fluctuations])
total_sys_data_fluctuations = np.array([x[:-3] for x in total_sys_data_true_fluctuations])
total_sys_reco_MAs = []
total_sys_reco_MA_sigmas = []
total_sys_reco_MAs_no_prior_removal = []
total_sys_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, total_sys_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    total_sys_reco_MAs.append(prior_removed_MA)
    total_sys_reco_MA_sigmas.append(prior_removed_MA_sigma)
    total_sys_reco_MAs_no_prior_removal.append(constrained_trio[0])
    total_sys_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))

xs_sys_data_true_fluctuations = np.random.multivariate_normal(pred_MA, multisim_xs_MA_cov, size=600)
xs_sys_true_MAs = np.array([x[-3] for x in xs_sys_data_true_fluctuations])
xs_sys_data_fluctuations = np.array([x[:-3] for x in xs_sys_data_true_fluctuations])
xs_sys_reco_MAs = []
xs_sys_reco_MA_sigmas = []
xs_sys_reco_MAs_no_prior_removal = []
xs_sys_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, xs_sys_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    xs_sys_reco_MAs.append(prior_removed_MA)
    xs_sys_reco_MA_sigmas.append(prior_removed_MA_sigma)
    xs_sys_reco_MAs_no_prior_removal.append(constrained_trio[0])
    xs_sys_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))


genie_vars_true_fluctuations = xs_vars
genie_vars_true_MAs = np.array([x[-3] for x in genie_vars_true_fluctuations])
genie_vars_data_fluctuations = np.array([x[:-3] for x in genie_vars_true_fluctuations])
genie_vars_reco_MAs = []
genie_vars_reco_MA_sigmas = []
genie_vars_reco_MAs_no_prior_removal = []
genie_vars_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, genie_vars_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    genie_vars_reco_MAs.append(prior_removed_MA)
    genie_vars_reco_MA_sigmas.append(prior_removed_MA_sigma)
    genie_vars_reco_MAs_no_prior_removal.append(constrained_trio[0])
    genie_vars_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))


plt.figure(figsize=(10,7))
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)
plt.errorbar(total_sys_true_MAs, total_sys_reco_MAs, yerr=total_sys_reco_MA_sigmas, fmt="o", markersize=4, c="tab:green", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from all systematic fluctuations")
plt.errorbar(xs_sys_true_MAs, xs_sys_reco_MAs, yerr=xs_sys_reco_MA_sigmas, fmt="o", markersize=4, c="tab:blue", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from XS systematic fluctuations")
plt.errorbar(genie_vars_true_MAs, genie_vars_reco_MAs, yerr=genie_vars_reco_MA_sigmas, fmt="o", markersize=4, c="tab:red", capsize=4, lw=1, capthick=1, zorder=0, label="600 GENIE multisim variations")
plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA, asimov_data_extracted_MA_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.legend(loc="upper left", fontsize=8)
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.7, 1.5)
plt.savefig(filename + ".pdf")

plt.figure(figsize=(10,7))
plt.fill_between([0.7, 1.5], [prior_MA - prior_MA_sigma, prior_MA - prior_MA_sigma], [prior_MA + prior_MA_sigma, prior_MA + prior_MA_sigma], color="gray", alpha=0.2, label="GENIE Prior")
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)
plt.errorbar(total_sys_true_MAs, total_sys_reco_MAs_no_prior_removal, yerr=total_sys_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:green", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from all systematic fluctuations")
plt.errorbar(xs_sys_true_MAs, xs_sys_reco_MAs_no_prior_removal, yerr=xs_sys_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:blue", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from XS systematic fluctuations")
plt.errorbar(genie_vars_true_MAs, genie_vars_reco_MAs_no_prior_removal, yerr=genie_vars_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:red", capsize=4, lw=1, capthick=1, zorder=0, label="600 GENIE multisim variations")
plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.legend(loc="upper left", fontsize=8)
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ Posterior (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.7, 1.5)
plt.savefig(filename + "_no_prior_removal.pdf")

plt.figure(figsize=(10,7))
plt.hist((total_sys_reco_MAs - total_sys_true_MAs) / total_sys_reco_MA_sigmas, bins=bins, histtype="step", lw=2, label="600 fake data sets\nfrom covariance matrix variations")
plt.hist((xs_sys_reco_MAs - xs_sys_true_MAs) / xs_sys_reco_MA_sigmas, bins=bins, histtype="step", lw=2, label="600 fake data sets\nfrom XS systematic fluctuations")
plt.hist((genie_vars_reco_MAs - genie_vars_true_MAs) / genie_vars_reco_MA_sigmas, bins=bins, histtype="step", lw=2, label="600 GENIE multisim variations")
plt.plot(x, 600 * bin_width * np.exp(-0.5 * x * x) / np.sqrt(2 * np.pi), c="k", ls="dashed", lw=2, label="Gaussian with area = 600")
plt.ylabel("Number of Fake Data Universes")
plt.xlabel(r"Number of sigmas between reco and true $M_A$")
plt.legend()
plt.ylim(0, 400)
plt.savefig(filename + "_sigma_diffs.pdf")


# Shape-Only

In [None]:
filename = "plots/fake_data_distribution_+100"

# GENIE v2 fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "GENIE_v2",
    shape_type = "+100"
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma

# NuWro fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "NuWro",
    shape_type = "+100"
)
# using NuWro setup for Asimov data and setting prior variables
asimov_data = pred_MA[:-3]
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, asimov_data)
asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
asimov_data_extracted_MA, asimov_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma
trio_prior = pred_MA[-3:]
cov_prior = cov_MA[-3:, -3:]
prior_MA = trio_prior[0]
prior_MA_sigma = np.sqrt(cov_prior[0][0])
asimov_true_MA = prior_MA
# NuWro fake data
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma

# using NuWro setup for fluctuation fake data tests
total_sys_data_true_fluctuations = np.random.multivariate_normal(pred_MA, cov_MA, size=600)
total_sys_true_MAs = np.array([x[-3] for x in total_sys_data_true_fluctuations])
total_sys_data_fluctuations = np.array([x[:-3] for x in total_sys_data_true_fluctuations])
total_sys_reco_MAs = []
total_sys_reco_MA_sigmas = []
total_sys_reco_MAs_no_prior_removal = []
total_sys_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, total_sys_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    total_sys_reco_MAs.append(prior_removed_MA)
    total_sys_reco_MA_sigmas.append(prior_removed_MA_sigma)
    total_sys_reco_MAs_no_prior_removal.append(constrained_trio[0])
    total_sys_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))

xs_sys_data_true_fluctuations = np.random.multivariate_normal(pred_MA, multisim_xs_MA_cov, size=600)
xs_sys_true_MAs = np.array([x[-3] for x in xs_sys_data_true_fluctuations])
xs_sys_data_fluctuations = np.array([x[:-3] for x in xs_sys_data_true_fluctuations])
xs_sys_reco_MAs = []
xs_sys_reco_MA_sigmas = []
xs_sys_reco_MAs_no_prior_removal = []
xs_sys_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, xs_sys_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    xs_sys_reco_MAs.append(prior_removed_MA)
    xs_sys_reco_MA_sigmas.append(prior_removed_MA_sigma)
    xs_sys_reco_MAs_no_prior_removal.append(constrained_trio[0])
    xs_sys_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))


genie_vars_true_fluctuations = xs_vars
genie_vars_true_MAs = np.array([x[-3] for x in genie_vars_true_fluctuations])
genie_vars_data_fluctuations = np.array([x[:-3] for x in genie_vars_true_fluctuations])
genie_vars_reco_MAs = []
genie_vars_reco_MA_sigmas = []
genie_vars_reco_MAs_no_prior_removal = []
genie_vars_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, genie_vars_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    genie_vars_reco_MAs.append(prior_removed_MA)
    genie_vars_reco_MA_sigmas.append(prior_removed_MA_sigma)
    genie_vars_reco_MAs_no_prior_removal.append(constrained_trio[0])
    genie_vars_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))


plt.figure(figsize=(10,7))
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)
plt.errorbar(total_sys_true_MAs, total_sys_reco_MAs, yerr=total_sys_reco_MA_sigmas, fmt="o", markersize=4, c="tab:green", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from all systematic fluctuations")
plt.errorbar(xs_sys_true_MAs, xs_sys_reco_MAs, yerr=xs_sys_reco_MA_sigmas, fmt="o", markersize=4, c="tab:blue", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from XS systematic fluctuations")
plt.errorbar(genie_vars_true_MAs, genie_vars_reco_MAs, yerr=genie_vars_reco_MA_sigmas, fmt="o", markersize=4, c="tab:red", capsize=4, lw=1, capthick=1, zorder=0, label="600 GENIE multisim variations")
plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA, asimov_data_extracted_MA_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.title("Shape-Only, +100% Normalization Uncertainty")
plt.legend(loc="upper left", fontsize=8)
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.7, 1.5)
plt.savefig(filename + ".pdf")

plt.figure(figsize=(10,7))
plt.fill_between([0.7, 1.5], [prior_MA - prior_MA_sigma, prior_MA - prior_MA_sigma], [prior_MA + prior_MA_sigma, prior_MA + prior_MA_sigma], color="gray", alpha=0.2, label="GENIE Prior")
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)
plt.errorbar(total_sys_true_MAs, total_sys_reco_MAs_no_prior_removal, yerr=total_sys_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:green", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from all systematic fluctuations")
plt.errorbar(xs_sys_true_MAs, xs_sys_reco_MAs_no_prior_removal, yerr=xs_sys_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:blue", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from XS systematic fluctuations")
plt.errorbar(genie_vars_true_MAs, genie_vars_reco_MAs_no_prior_removal, yerr=genie_vars_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:red", capsize=4, lw=1, capthick=1, zorder=0, label="600 GENIE multisim variations")
plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.title("Shape-Only, +100% Normalization Uncertainty")
plt.legend(loc="upper left", fontsize=8)
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ Posterior (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.7, 1.5)
plt.savefig(filename + "_no_prior_removal.pdf")


filename = "plots/fake_data_distribution_matrix_breakdown"

# GENIE v2 fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "GENIE_v2",
    shape_type = "matrix_breakdown"
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma

# NuWro fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "NuWro",
    shape_type = "matrix_breakdown"
)
# using NuWro setup for Asimov data and setting prior variables
asimov_data = pred_MA[:-3]
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, asimov_data)
asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
asimov_data_extracted_MA, asimov_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma
trio_prior = pred_MA[-3:]
cov_prior = cov_MA[-3:, -3:]
prior_MA = trio_prior[0]
prior_MA_sigma = np.sqrt(cov_prior[0][0])
asimov_true_MA = prior_MA
# NuWro fake data
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma

# using NuWro setup for fluctuation fake data tests
total_sys_data_true_fluctuations = np.random.multivariate_normal(pred_MA, cov_MA, size=600)
total_sys_true_MAs = np.array([x[-3] for x in total_sys_data_true_fluctuations])
total_sys_data_fluctuations = np.array([x[:-3] for x in total_sys_data_true_fluctuations])
total_sys_reco_MAs = []
total_sys_reco_MA_sigmas = []
total_sys_reco_MAs_no_prior_removal = []
total_sys_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, total_sys_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    total_sys_reco_MAs.append(prior_removed_MA)
    total_sys_reco_MA_sigmas.append(prior_removed_MA_sigma)
    total_sys_reco_MAs_no_prior_removal.append(constrained_trio[0])
    total_sys_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))

xs_sys_data_true_fluctuations = np.random.multivariate_normal(pred_MA, multisim_xs_MA_cov, size=600)
xs_sys_true_MAs = np.array([x[-3] for x in xs_sys_data_true_fluctuations])
xs_sys_data_fluctuations = np.array([x[:-3] for x in xs_sys_data_true_fluctuations])
xs_sys_reco_MAs = []
xs_sys_reco_MA_sigmas = []
xs_sys_reco_MAs_no_prior_removal = []
xs_sys_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, xs_sys_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    xs_sys_reco_MAs.append(prior_removed_MA)
    xs_sys_reco_MA_sigmas.append(prior_removed_MA_sigma)
    xs_sys_reco_MAs_no_prior_removal.append(constrained_trio[0])
    xs_sys_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))


genie_vars_true_fluctuations = xs_vars
genie_vars_true_MAs = np.array([x[-3] for x in genie_vars_true_fluctuations])
genie_vars_data_fluctuations = np.array([x[:-3] for x in genie_vars_true_fluctuations])
genie_vars_reco_MAs = []
genie_vars_reco_MA_sigmas = []
genie_vars_reco_MAs_no_prior_removal = []
genie_vars_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, genie_vars_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    genie_vars_reco_MAs.append(prior_removed_MA)
    genie_vars_reco_MA_sigmas.append(prior_removed_MA_sigma)
    genie_vars_reco_MAs_no_prior_removal.append(constrained_trio[0])
    genie_vars_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))


plt.figure(figsize=(10,7))
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)
plt.errorbar(total_sys_true_MAs, total_sys_reco_MAs, yerr=total_sys_reco_MA_sigmas, fmt="o", markersize=4, c="tab:green", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from all systematic fluctuations")
plt.errorbar(xs_sys_true_MAs, xs_sys_reco_MAs, yerr=xs_sys_reco_MA_sigmas, fmt="o", markersize=4, c="tab:blue", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from XS systematic fluctuations")
plt.errorbar(genie_vars_true_MAs, genie_vars_reco_MAs, yerr=genie_vars_reco_MA_sigmas, fmt="o", markersize=4, c="tab:red", capsize=4, lw=1, capthick=1, zorder=0, label="600 GENIE multisim variations")
plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA, asimov_data_extracted_MA_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.title("Shape-Only, Matrix Breakdown")
plt.legend(loc="upper left", fontsize=8)
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.7, 1.5)
plt.savefig(filename + ".pdf")

plt.figure(figsize=(10,7))
plt.fill_between([0.7, 1.5], [prior_MA - prior_MA_sigma, prior_MA - prior_MA_sigma], [prior_MA + prior_MA_sigma, prior_MA + prior_MA_sigma], color="gray", alpha=0.2, label="GENIE Prior")
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)
plt.errorbar(total_sys_true_MAs, total_sys_reco_MAs_no_prior_removal, yerr=total_sys_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:green", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from all systematic fluctuations")
plt.errorbar(xs_sys_true_MAs, xs_sys_reco_MAs_no_prior_removal, yerr=xs_sys_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:blue", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from XS systematic fluctuations")
plt.errorbar(genie_vars_true_MAs, genie_vars_reco_MAs_no_prior_removal, yerr=genie_vars_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:red", capsize=4, lw=1, capthick=1, zorder=0, label="600 GENIE multisim variations")
plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.title("Shape-Only, Matrix Breakdown")
plt.legend(loc="upper left", fontsize=8)
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ Posterior (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.7, 1.5)
plt.savefig(filename + "_no_prior_removal.pdf")


In [None]:
def get_ellipse_x_y_points_from_pair_cov(pair, cov, sigma=2):
    eigenvals, eigenvecs = np.linalg.eig(cov)
    eigenvecs = np.transpose(eigenvecs) 
    t = np.linspace(0, 2*np.pi, 100)
    ellipse_points_x = (np.cos(t) * sigma * np.sqrt(eigenvals[0]) * eigenvecs[0][1] + pair[1]
                    + np.sin(t) * sigma * np.sqrt(eigenvals[1]) * eigenvecs[1][1])
    ellpise_points_y = (np.cos(t) * sigma * np.sqrt(eigenvals[0]) * eigenvecs[0][0] + pair[0]
                    + np.sin(t) * sigma * np.sqrt(eigenvals[1]) * eigenvecs[1][0])
    return ellipse_points_x, ellpise_points_y


# GENIE v2 fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "GENIE_v2",
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
genie_v2_pair = [constrained_trio[0], constrained_trio[2]]
genie_v2_pair_cov = [[constrained_trio_cov[0, 0], constrained_trio_cov[0, 2]],
                     [constrained_trio_cov[2, 0], constrained_trio_cov[2, 2]]]

cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "GENIE_v2",
    shape_type = "+100"
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
genie_v2_plus100_pair = [constrained_trio[0], constrained_trio[2]]
genie_v2_plus100_pair_cov = [[constrained_trio_cov[0, 0], constrained_trio_cov[0, 2]],
                             [constrained_trio_cov[2, 0], constrained_trio_cov[2, 2]]]

cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "GENIE_v2",
    shape_type = "matrix_breakdown"
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
genie_v2_matrix_breakdown_pair = [constrained_trio[0], constrained_trio[2]]
genie_v2_matrix_breakdown_pair_cov = [[constrained_trio_cov[0, 0], constrained_trio_cov[0, 2]],
                                     [constrained_trio_cov[2, 0], constrained_trio_cov[2, 2]]]

# NuWro fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "NuWro"
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
nuwro_pair = [constrained_trio[0], constrained_trio[2]]
nuwro_pair_cov = [[constrained_trio_cov[0, 0], constrained_trio_cov[0, 2]],
                  [constrained_trio_cov[2, 0], constrained_trio_cov[2, 2]]]
prior_pair = [pred_MA[-3:][0], pred_MA[-3:][2]]
prior_pair_cov = [[cov_MA[-3, -3], cov_MA[-3, -1]],
                          [cov_MA[-1, -3], cov_MA[-1, -1]]]

cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "NuWro",
    shape_type = "+100"
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
nuwro_plus100_pair = [constrained_trio[0], constrained_trio[2]]
nuwro_plus100_pair_cov = [[constrained_trio_cov[0, 0], constrained_trio_cov[0, 2]],
                          [constrained_trio_cov[2, 0], constrained_trio_cov[2, 2]]]

cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "NuWro",
    shape_type = "matrix_breakdown"
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
nuwro_matrix_breakdown_pair = [constrained_trio[0], constrained_trio[2]]
nuwro_matrix_breakdown_pair_cov = [[constrained_trio_cov[0, 0], constrained_trio_cov[0, 2]],
                                  [constrained_trio_cov[2, 0], constrained_trio_cov[2, 2]]]


plt.rcParams.update({'font.size': 14})
plt.rcParams['lines.linewidth'] = 3

plt.figure(figsize=(10,7))

plt.scatter([1], [1.1], marker="*", s=200, label=f"Prior best-fit, (1, 1.1)", edgecolors="k", c="tab:blue", zorder=10)
x, y = get_ellipse_x_y_points_from_pair_cov(prior_pair, prior_pair_cov, sigma=2)
plt.plot(x, y, label="Prior 2$\sigma$ Ellipse", c="tab:blue")

plt.scatter([nuwro_pair[1]], [nuwro_pair[0]], marker="*", s=200,
            label=f"Shape+rate posterior best-fit, ({nuwro_pair[1]:.3f}, {nuwro_pair[0]:.3f})"
            , edgecolors="k", c="tab:orange", zorder=10)
x, y = get_ellipse_x_y_points_from_pair_cov(nuwro_pair, nuwro_pair_cov, sigma=2)
plt.plot(x, y, label="Shape+rate 2$\sigma$ Ellipse", c="tab:orange")

plt.scatter([nuwro_plus100_pair[1]], [nuwro_plus100_pair[0]], marker="*", s=200,
            label=f"Shape-Only (+100%) posterior best-fit, ({nuwro_plus100_pair[1]:.3f}, {nuwro_plus100_pair[0]:.3f})"
            , edgecolors="k", c="tab:red", zorder=10)
x, y = get_ellipse_x_y_points_from_pair_cov(nuwro_plus100_pair, nuwro_plus100_pair_cov, sigma=2)
plt.plot(x, y, label="Shape-Only (+100%) 2$\sigma$ Ellipse", c="tab:red")

plt.scatter([nuwro_matrix_breakdown_pair[1]], [nuwro_matrix_breakdown_pair[0]], marker="*", s=200,
            label=f"Shape-Only (Matrix Breakdown) posterior best-fit, ({nuwro_matrix_breakdown_pair[1]:.3f}, {nuwro_matrix_breakdown_pair[0]:.3f})"
            , edgecolors="k", c="tab:green", zorder=10)
x, y = get_ellipse_x_y_points_from_pair_cov(nuwro_matrix_breakdown_pair, nuwro_matrix_breakdown_pair_cov, sigma=2)
plt.plot(x, y, label="Shape-Only (Matrix Breakdown) 2$\sigma$ Ellipse", c="tab:green")

plt.xlabel(r"$\lambda$""\n(total events / CV events)\n(proportional to total cross-section)")
plt.ylabel(r"$M_A$ (GeV/c$^2$)")

plt.legend()

plt.ylim((0.4, 2.5))

plt.title("NuWro Fake Data")

filename = "plots/M_A_vs_lambda_nuwro"
plt.savefig("plots/M_A_vs_lambda_nuwro.pdf")

plt.figure(figsize=(10,7))

plt.scatter([1], [1.1], marker="*", s=200, label=f"Prior best-fit, (1, 1.1)", edgecolors="k", c="tab:blue", zorder=10)
x, y = get_ellipse_x_y_points_from_pair_cov(prior_pair, prior_pair_cov, sigma=2)
plt.plot(x, y, label="Prior 2$\sigma$ Ellipse", c="tab:blue")

plt.scatter([genie_v2_pair[1]], [genie_v2_pair[0]], marker="*", s=200,
            label=f"Shape+rate posterior best-fit, ({genie_v2_pair[1]:.3f}, {genie_v2_pair[0]:.3f})"
            , edgecolors="k", c="tab:orange", zorder=10)
x, y = get_ellipse_x_y_points_from_pair_cov(genie_v2_pair, genie_v2_pair_cov, sigma=2)
plt.plot(x, y, label="Shape+rate 2$\sigma$ Ellipse", c="tab:orange")

plt.scatter([genie_v2_plus100_pair[1]], [genie_v2_plus100_pair[0]], marker="*", s=200,
            label=f"Shape-Only (+100%) posterior best-fit, ({genie_v2_plus100_pair[1]:.3f}, {genie_v2_plus100_pair[0]:.3f})"
            , edgecolors="k", c="tab:red", zorder=10)
x, y = get_ellipse_x_y_points_from_pair_cov(genie_v2_plus100_pair, genie_v2_plus100_pair_cov, sigma=2)
plt.plot(x, y, label="Shape-Only (+100%) 2$\sigma$ Ellipse", c="tab:red")

plt.scatter([genie_v2_matrix_breakdown_pair[1]], [genie_v2_matrix_breakdown_pair[0]], marker="*", s=200,
            label=f"Shape-Only (Matrix Breakdown) posterior best-fit, ({genie_v2_matrix_breakdown_pair[1]:.3f}, {genie_v2_matrix_breakdown_pair[0]:.3f})"
            , edgecolors="k", c="tab:green", zorder=10)
x, y = get_ellipse_x_y_points_from_pair_cov(genie_v2_matrix_breakdown_pair, genie_v2_matrix_breakdown_pair_cov, sigma=2)
plt.plot(x, y, label="Shape-Only (Matrix Breakdown) 2$\sigma$ Ellipse", c="tab:green")

plt.xlabel(r"$\lambda$""\n(total events / CV events)\n(proportional to total cross-section)")
plt.ylabel(r"$M_A$ (GeV/c$^2$)")

plt.legend()

plt.ylim((0.4, 2.5))

plt.title("GENIE v2 Fake Data")

plt.savefig("plots/M_A_vs_lambda_genie.pdf")


# Collapsing Dimensions

In [None]:
filename = "plots/fake_data_distribution_2D"

# GENIE v2 fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "GENIE_v2",
    collapse_type="2D"
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma

# NuWro fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "NuWro",
    shape_type = "+100"
)
# using NuWro setup for Asimov data and setting prior variables
asimov_data = pred_MA[:-3]
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, asimov_data)
asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
asimov_data_extracted_MA, asimov_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma
trio_prior = pred_MA[-3:]
cov_prior = cov_MA[-3:, -3:]
prior_MA = trio_prior[0]
prior_MA_sigma = np.sqrt(cov_prior[0][0])
asimov_true_MA = prior_MA
# NuWro fake data
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma

# using NuWro setup for fluctuation fake data tests
total_sys_data_true_fluctuations = np.random.multivariate_normal(pred_MA, cov_MA, size=600)
total_sys_true_MAs = np.array([x[-3] for x in total_sys_data_true_fluctuations])
total_sys_data_fluctuations = np.array([x[:-3] for x in total_sys_data_true_fluctuations])
total_sys_reco_MAs = []
total_sys_reco_MA_sigmas = []
total_sys_reco_MAs_no_prior_removal = []
total_sys_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, total_sys_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    total_sys_reco_MAs.append(prior_removed_MA)
    total_sys_reco_MA_sigmas.append(prior_removed_MA_sigma)
    total_sys_reco_MAs_no_prior_removal.append(constrained_trio[0])
    total_sys_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))

xs_sys_data_true_fluctuations = np.random.multivariate_normal(pred_MA, multisim_xs_MA_cov, size=600)
xs_sys_true_MAs = np.array([x[-3] for x in xs_sys_data_true_fluctuations])
xs_sys_data_fluctuations = np.array([x[:-3] for x in xs_sys_data_true_fluctuations])
xs_sys_reco_MAs = []
xs_sys_reco_MA_sigmas = []
xs_sys_reco_MAs_no_prior_removal = []
xs_sys_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, xs_sys_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    xs_sys_reco_MAs.append(prior_removed_MA)
    xs_sys_reco_MA_sigmas.append(prior_removed_MA_sigma)
    xs_sys_reco_MAs_no_prior_removal.append(constrained_trio[0])
    xs_sys_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))


genie_vars_true_fluctuations = xs_vars
genie_vars_true_MAs = np.array([x[-3] for x in genie_vars_true_fluctuations])
genie_vars_data_fluctuations = np.array([x[:-3] for x in genie_vars_true_fluctuations])
genie_vars_reco_MAs = []
genie_vars_reco_MA_sigmas = []
genie_vars_reco_MAs_no_prior_removal = []
genie_vars_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, genie_vars_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    genie_vars_reco_MAs.append(prior_removed_MA)
    genie_vars_reco_MA_sigmas.append(prior_removed_MA_sigma)
    genie_vars_reco_MAs_no_prior_removal.append(constrained_trio[0])
    genie_vars_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))


plt.figure(figsize=(10,7))
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)
plt.errorbar(total_sys_true_MAs, total_sys_reco_MAs, yerr=total_sys_reco_MA_sigmas, fmt="o", markersize=4, c="tab:green", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from all systematic fluctuations")
plt.errorbar(xs_sys_true_MAs, xs_sys_reco_MAs, yerr=xs_sys_reco_MA_sigmas, fmt="o", markersize=4, c="tab:blue", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from XS systematic fluctuations")
plt.errorbar(genie_vars_true_MAs, genie_vars_reco_MAs, yerr=genie_vars_reco_MA_sigmas, fmt="o", markersize=4, c="tab:red", capsize=4, lw=1, capthick=1, zorder=0, label="600 GENIE multisim variations")
plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA, asimov_data_extracted_MA_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.title("Collapsing to 2D, Muon Momentum and Muon Angle")
plt.legend(loc="upper left", fontsize=8)
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.5, 1.7)
plt.savefig(filename + ".pdf")

plt.figure(figsize=(10,7))
plt.fill_between([0.7, 1.5], [prior_MA - prior_MA_sigma, prior_MA - prior_MA_sigma], [prior_MA + prior_MA_sigma, prior_MA + prior_MA_sigma], color="gray", alpha=0.2, label="GENIE Prior")
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)
plt.errorbar(total_sys_true_MAs, total_sys_reco_MAs_no_prior_removal, yerr=total_sys_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:green", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from all systematic fluctuations")
plt.errorbar(xs_sys_true_MAs, xs_sys_reco_MAs_no_prior_removal, yerr=xs_sys_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:blue", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from XS systematic fluctuations")
plt.errorbar(genie_vars_true_MAs, genie_vars_reco_MAs_no_prior_removal, yerr=genie_vars_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:red", capsize=4, lw=1, capthick=1, zorder=0, label="600 GENIE multisim variations")
plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.title("Collapsing to 2D, Muon Momentum and Muon Angle")
plt.legend(loc="upper left", fontsize=8)
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ Posterior (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.5, 1.7)
plt.savefig(filename + "_no_prior_removal.pdf")


filename = "plots/fake_data_distribution_1D"

# GENIE v2 fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "GENIE_v2",
    collapse_type="1D"
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma

# NuWro fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "NuWro",
    collapse_type="1D"
)
# using NuWro setup for Asimov data and setting prior variables
asimov_data = pred_MA[:-3]
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, asimov_data)
asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
asimov_data_extracted_MA, asimov_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma
trio_prior = pred_MA[-3:]
cov_prior = cov_MA[-3:, -3:]
prior_MA = trio_prior[0]
prior_MA_sigma = np.sqrt(cov_prior[0][0])
asimov_true_MA = prior_MA
# NuWro fake data
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma

# using NuWro setup for fluctuation fake data tests
total_sys_data_true_fluctuations = np.random.multivariate_normal(pred_MA, cov_MA, size=600)
total_sys_true_MAs = np.array([x[-3] for x in total_sys_data_true_fluctuations])
total_sys_data_fluctuations = np.array([x[:-3] for x in total_sys_data_true_fluctuations])
total_sys_reco_MAs = []
total_sys_reco_MA_sigmas = []
total_sys_reco_MAs_no_prior_removal = []
total_sys_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, total_sys_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    total_sys_reco_MAs.append(prior_removed_MA)
    total_sys_reco_MA_sigmas.append(prior_removed_MA_sigma)
    total_sys_reco_MAs_no_prior_removal.append(constrained_trio[0])
    total_sys_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))

xs_sys_data_true_fluctuations = np.random.multivariate_normal(pred_MA, multisim_xs_MA_cov, size=600)
xs_sys_true_MAs = np.array([x[-3] for x in xs_sys_data_true_fluctuations])
xs_sys_data_fluctuations = np.array([x[:-3] for x in xs_sys_data_true_fluctuations])
xs_sys_reco_MAs = []
xs_sys_reco_MA_sigmas = []
xs_sys_reco_MAs_no_prior_removal = []
xs_sys_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, xs_sys_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    xs_sys_reco_MAs.append(prior_removed_MA)
    xs_sys_reco_MA_sigmas.append(prior_removed_MA_sigma)
    xs_sys_reco_MAs_no_prior_removal.append(constrained_trio[0])
    xs_sys_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))


genie_vars_true_fluctuations = xs_vars
genie_vars_true_MAs = np.array([x[-3] for x in genie_vars_true_fluctuations])
genie_vars_data_fluctuations = np.array([x[:-3] for x in genie_vars_true_fluctuations])
genie_vars_reco_MAs = []
genie_vars_reco_MA_sigmas = []
genie_vars_reco_MAs_no_prior_removal = []
genie_vars_reco_MA_sigmas_no_prior_removal = []
extracted_outputs = extract_trio(cov_MA, pred_MA, genie_vars_data_fluctuations)
for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    genie_vars_reco_MAs.append(prior_removed_MA)
    genie_vars_reco_MA_sigmas.append(prior_removed_MA_sigma)
    genie_vars_reco_MAs_no_prior_removal.append(constrained_trio[0])
    genie_vars_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))


plt.figure(figsize=(10,7))
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)
plt.errorbar(total_sys_true_MAs, total_sys_reco_MAs, yerr=total_sys_reco_MA_sigmas, fmt="o", markersize=4, c="tab:green", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from all systematic fluctuations")
plt.errorbar(xs_sys_true_MAs, xs_sys_reco_MAs, yerr=xs_sys_reco_MA_sigmas, fmt="o", markersize=4, c="tab:blue", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from XS systematic fluctuations")
plt.errorbar(genie_vars_true_MAs, genie_vars_reco_MAs, yerr=genie_vars_reco_MA_sigmas, fmt="o", markersize=4, c="tab:red", capsize=4, lw=1, capthick=1, zorder=0, label="600 GENIE multisim variations")
plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA, asimov_data_extracted_MA_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.title("Collapsing to 1D, Muon Momentum")
plt.legend(loc="upper left", fontsize=8)
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.5, 1.7)
plt.savefig(filename + ".pdf")

plt.figure(figsize=(10,7))
plt.fill_between([0.7, 1.5], [prior_MA - prior_MA_sigma, prior_MA - prior_MA_sigma], [prior_MA + prior_MA_sigma, prior_MA + prior_MA_sigma], color="gray", alpha=0.2, label="GENIE Prior")
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)
plt.errorbar(total_sys_true_MAs, total_sys_reco_MAs_no_prior_removal, yerr=total_sys_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:green", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from all systematic fluctuations")
plt.errorbar(xs_sys_true_MAs, xs_sys_reco_MAs_no_prior_removal, yerr=xs_sys_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:blue", capsize=4, lw=1, capthick=1, zorder=0, label="600 fake data sets from XS systematic fluctuations")
plt.errorbar(genie_vars_true_MAs, genie_vars_reco_MAs_no_prior_removal, yerr=genie_vars_reco_MA_sigmas_no_prior_removal, fmt="o", markersize=4, c="tab:red", capsize=4, lw=1, capthick=1, zorder=0, label="600 GENIE multisim variations")
plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.title("Collapsing to 1D, Muon Momentum")
plt.legend(loc="upper left", fontsize=8)
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ Posterior (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.5, 1.7)
plt.savefig(filename + "_no_prior_removal.pdf")


# M_A vs MEC

In [None]:
filename = "plots/fake_data_M_A_MEC"

# GENIE v2 fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "GENIE_v2"
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
genie_v2_constrained_trio = constrained_trio
genie_v2_constrained_trio_cov = constrained_trio_cov

# NuWro fake data
cov_MA, pred_MA, data, multisim_xs_MA_cov, xs_vars = get_MA_trio_cov_mat_pred(
    data_type = "NuWro"
)
trio_prior = pred_MA[-3:]
cov_prior = cov_MA[-3:, -3:]
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
nuwro_constrained_trio = constrained_trio
nuwro_constrained_trio_cov = constrained_trio_cov


genie_v2_pair = genie_v2_constrained_trio[:-1]
genie_v2_pair_cov = genie_v2_constrained_trio_cov[:-1,:-1]

prior_pair = trio_prior[:-1]
prior_pair_cov = cov_prior[:-1,:-1]

nuwro_pair = nuwro_constrained_trio[:-1]
nuwro_pair_cov = nuwro_constrained_trio_cov[:-1,:-1]


plt.rcParams.update({'font.size': 14})
plt.rcParams['lines.linewidth'] = 3

plt.figure(figsize=(10,7))

plt.scatter(prior_pair[-1], prior_pair[-2], marker="*", s=200, label=f"Prior best-fit, (1.66, 1.1)", edgecolors="k", c="tab:blue")
x, y = get_ellipse_x_y_points_from_pair_cov(prior_pair, prior_pair_cov, sigma=2)
plt.plot(x, y, label="Prior 2$\sigma$ Ellipse", c="tab:blue")

plt.scatter(nuwro_pair[-1], nuwro_pair[-2], marker="*", s=200, label=f"NuWro Fake Data Posterior best-fit, ({nuwro_pair[-1]:.2f}, {nuwro_pair[-2]:.2f})", edgecolors="k", c="tab:orange")
x, y = get_ellipse_x_y_points_from_pair_cov(nuwro_pair, nuwro_pair_cov, sigma=2)
plt.plot(x, y, label="NuWro Fake Data Posterior 2$\sigma$ Ellipse", c="tab:orange")

plt.scatter(genie_v2_pair[-1], genie_v2_pair[-2], marker="*", s=200, label=f"GENIE v2 Fake Data Posterior best-fit, ({genie_v2_pair[-1]:.2f}, {genie_v2_pair[-2]:.2f})", edgecolors="k", c="tab:green")
x, y = get_ellipse_x_y_points_from_pair_cov(genie_v2_pair, genie_v2_pair_cov, sigma=2)
plt.plot(x, y, label="GENIE v2 Fake Data Posterior 2$\sigma$ Ellipse", c="tab:green")


plt.xlabel(r"NormCCMEC")
plt.ylabel(r"$M_A$ (GeV/c$^2$)")

lgnd = plt.legend()
lgnd.legend_handles[0]._sizes = [50]
lgnd.legend_handles[1]._sizes = [50]

plt.ylim((0.5, 2))

plt.savefig(filename + ".pdf")
