In [None]:
import numpy as np
import uproot as uproot
import uproot3 as uproot3
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd

from tqdm.notebook import tqdm

from get_MA_trio_cov_mat import get_prediction_cv_and_variations_dataframes, get_MA_trio_cov_mat_pred, extract_trio

import pickle


In [None]:
nuwro_fake_data_true_MA = 1.03 # From Afro slack 2024_06_10
genie_v2_fake_data_true_MA = 0.99 # Nitish slack 2023_04_25


# Generator Fake Data Extractions

In [None]:
# GENIE v2 fake data
cov_MA, pred_MA, data = get_MA_trio_cov_mat_pred(
    use_genie_v2_fake_data = True
)
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma

# NuWro fake data
cov_MA, pred_MA, data = get_MA_trio_cov_mat_pred(
    use_nuwro_fake_data = True,
    reweight_nuwro_fake_data = False,
)
# using NuWro setup for Asimov data and setting prior variables
asimov_data = pred_MA[:-3]
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, asimov_data)
asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
asimov_data_extracted_MA, asimov_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma
trio_prior = pred_MA[-3:]
cov_prior = cov_MA[-3:, -3:]
prior_MA = trio_prior[0]
prior_MA_sigma = np.sqrt(cov_prior[0][0])
asimov_true_MA = prior_MA
# NuWro fake data
constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = extract_trio(cov_MA, pred_MA, data)
nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma = constrained_trio[0], np.sqrt(constrained_trio_cov[0][0])
nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma = prior_removed_MA, prior_removed_MA_sigma


# Variation Fake Data Samples

In [None]:
total_sys_data_true_fluctuations = np.random.multivariate_normal(pred_MA, cov_MA, size=600)
total_sys_true_MAs = [x[-3] for x in total_sys_data_true_fluctuations]
total_sys_data_fluctuations = [x[:-3] for x in total_sys_data_true_fluctuations]

total_sys_reco_MAs = []
total_sys_reco_MA_sigmas = []
total_sys_reco_MAs_no_prior_removal = []
total_sys_reco_MA_sigmas_no_prior_removal = []

extracted_outputs = extract_trio(cov_MA, pred_MA, total_sys_data_fluctuations)

for tup in extracted_outputs:
    constrained_trio, constrained_trio_cov, prior_removed_MA, prior_removed_MA_sigma = tup
    total_sys_reco_MAs.append(prior_removed_MA)
    total_sys_reco_MA_sigmas.append(prior_removed_MA_sigma)
    total_sys_reco_MAs_no_prior_removal.append(constrained_trio[0])
    total_sys_reco_MA_sigmas_no_prior_removal.append(np.sqrt(constrained_trio_cov[0,0]))


In [None]:
plt.rcParams.update({'font.size': 10})
s = 20

def plot_bold_MA_value(true_MA, extracted_MA, extracted_MA_sigma, label, color, zorder):
    plt.scatter(true_MA, extracted_MA, s=30, label=label + f": {extracted_MA:.3f} "r"$\pm$"f" {extracted_MA_sigma:.3f} "
            r"$\mathrm{GeV/c^2}$"f"\n\t\t\tTrue: {true_MA:.3f} "r"$\mathrm{GeV/c^2}$""\n\t\t\t"
            fr"$\sigma = {np.abs(true_MA - extracted_MA)/extracted_MA_sigma:.3f}$"
            , c=color, zorder=zorder)
    plt.scatter(true_MA, extracted_MA, s=70, c="k", zorder=9)
    plt.errorbar(true_MA, extracted_MA, yerr=extracted_MA_sigma, fmt="none", c=color, capthick=2, elinewidth=2, capsize=5, zorder=zorder)
    plt.errorbar(true_MA, extracted_MA, yerr=extracted_MA_sigma, fmt="none", c="k", capthick=4, elinewidth=4, capsize=6, zorder=9)


plt.figure(figsize=(10,7))
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)

for i in range(600):
    label = ""
    if i == 0: label = "600 fake data sets from all systematic fluctuations"
    plt.scatter(total_sys_true_MAs[i], total_sys_reco_MAs[i], c="tab:green")
    plt.errorbar(total_sys_true_MAs[i], total_sys_reco_MAs[i], yerr=total_sys_reco_MA_sigmas[i], c="tab:green")

plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA, asimov_data_extracted_MA_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA, nuwro_fake_data_extracted_MA_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA, genie_v2_fake_data_extracted_MA_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.legend(loc="upper left")
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.7, 1.5)
plt.show()

plt.figure(figsize=(10,7))
plt.fill_between([0.7, 1.5], [prior_MA - prior_MA_sigma, prior_MA - prior_MA_sigma], [prior_MA + prior_MA_sigma, prior_MA + prior_MA_sigma], color="gray", alpha=0.2, label="GENIE Prior")
plt.plot([0.7, 1.5], [0.7, 1.5], c="k", ls="dashed", lw=2, zorder=5)
plot_bold_MA_value(asimov_true_MA, asimov_data_extracted_MA_no_prior_removal, asimov_data_extracted_MA_no_prior_removal_sigma, "Asimov fake data", "tab:red", 10)
plot_bold_MA_value(nuwro_fake_data_true_MA, nuwro_fake_data_extracted_MA_no_prior_removal, nuwro_fake_data_extracted_MA_no_prior_removal_sigma, "NuWro fake data", "tab:brown", 10)
plot_bold_MA_value(genie_v2_fake_data_true_MA, genie_v2_fake_data_extracted_MA_no_prior_removal, genie_v2_fake_data_extracted_MA_no_prior_removal_sigma, "GENIE v2 fake data", "tab:purple", 10)
plt.legend(loc="upper left")
plt.xlabel(r"True $M_A$ (GeV/c$^2$)")
plt.ylabel(r"Reco $M_A$ Posterior (GeV/c$^2$)")
plt.xlim(0.7, 1.5)
plt.ylim(0.7, 1.5)
plt.show()
