In [None]:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib as mpl
mpl.rcParams['hatch.linewidth'] = 0.2
import numpy as np
import pandas as pd
import pickle
from tqdm.notebook import tqdm
import polars as pl

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from src.signal_categories import topological_category_labels, topological_category_colors, topological_category_labels_latex, topological_category_hatches, topological_categories_dic
from src.signal_categories import filetype_category_labels, filetype_category_colors, filetype_category_hatches
from src.signal_categories import del1g_detailed_category_labels, del1g_detailed_category_colors, del1g_detailed_category_labels_latex, del1g_detailed_category_hatches, del1g_detailed_categories_dic
from src.signal_categories import del1g_simple_category_labels, del1g_simple_category_colors, del1g_simple_category_labels_latex, del1g_simple_category_hatches, del1g_simple_categories_dic
from src.signal_categories import train_category_labels, train_category_labels_latex

from src.ntuple_variables.pandora_variables import pandora_scalar_second_half_training_vars

from src.file_locations import intermediate_files_location

from src.plot_helpers import make_plot

from src.ntuple_variables.variables import combined_training_vars

from src.systematics import get_significance_from_p_value

plt.rcParams.update({'font.size': 12})


# Test DetVar Plot

In [None]:
print("loading presel_detvar_df_train_vars.parquet...")
presel_detvar_df = pl.read_parquet(f"{intermediate_files_location}/detvar_presel_df_train_vars.parquet")
print(f"{presel_detvar_df.shape=}")


In [None]:
presel_detvar_df[["filetype", "vartype", "run", "subrun", "event", "wc_kine_reco_Enu", "wc_net_weight", "wc_weight_cv"]].filter((pl.col("run") == 24651) & (pl.col("subrun") == 85))

In [None]:
cv_presel_detvar_df = presel_detvar_df.filter(pl.col("vartype") == "CV")

In [None]:
np.sum(cv_presel_detvar_df[["wc_net_weight"]].to_numpy())

In [None]:
plt.figure()
plt.hist(cv_presel_detvar_df[["wc_kine_reco_Enu"]].to_numpy(), weights=cv_presel_detvar_df[["wc_net_weight"]].to_numpy(), bins=np.linspace(0, 2000, 21))
plt.show()


In [None]:
# hack to plot the legends without anything behind it
make_plot(pred_sel_df=presel_detvar_df, include_data=False, breakdown_type="DetVar", include_ratio=True, normalizing_POT=1.11e21,
    var='wc_kine_reco_Enu',
    include_legend=True, bins=np.array(np.linspace(0, 2000, 21)), include_overflow=True, include_underflow=False, log_x=False,
    savename="wc_generic_sel", title="v10_04_07_15 BNB nu_overlay DetVar tests")

In [None]:
presel_detvar_df[["filetype", "vartype", "run", "subrun", "event", "glee_min_isolation_min_dist_trk_shr"]].filter(
    (pl.col('glee_min_isolation_min_dist_trk_shr') == pl.col('glee_min_isolation_min_dist_trk_shr')))

In [None]:
# hack to plot the legends without anything behind it
make_plot(pred_sel_df=presel_detvar_df, include_data=False, breakdown_type="DetVar", include_ratio=True, log_y=True,
    var='glee_min_isolation_min_dist_trk_shr',
    include_legend=True, bins=np.array(np.linspace(-1000, 500, 21)), include_overflow=True, include_underflow=False, log_x=False,
    savename="wc_generic_sel", title="v10_04_07_15 BNB nu_overlay DetVar tests")

In [None]:
plot_vars = sorted(combined_training_vars)

with PdfPages("../plots/detvar_all_bdt_vars.pdf") as pdf:
    for i, var in tqdm(enumerate(plot_vars), total=len(plot_vars)):
        print("plotting", var)

        make_plot(pred_sel_df=presel_detvar_df, include_data=False, breakdown_type="DetVar", include_ratio=True,
            var=var, log_y=True,
            include_legend=True, include_overflow=False, include_underflow=False, log_x=False,
            savename="wc_generic_sel", title="v10_04_07_15 BNB nu_overlay DetVar tests",
            page_num=i+1, show=False)

        pdf.savefig()
        plt.close()


In [None]:
print(1/0)

# Test Plot With RW Systematics

In [None]:
reco_categories = train_category_labels
reco_category_labels_latex = train_category_labels_latex


print("loading all_df.parquet...")
all_df = pl.read_parquet(f"{intermediate_files_location}/presel_df_train_vars.parquet")
print(f"{all_df.shape=}")

print("loading presel_weights_df.parquet...")
presel_weights_df = pl.read_parquet(f"{intermediate_files_location}/presel_weights_df.parquet")
print(f"{presel_weights_df.shape=}")

pred_df = all_df.filter(
    ~pl.col("filetype").is_in(["data", "isotropic_one_gamma_overlay", "delete_one_gamma_overlay"])
)
data_df = all_df.filter(
    pl.col("filetype") == "data"
)


In [None]:
# hack to plot the legends without anything behind it
make_plot(pred_sel_df=pred_df, data_sel_df=data_df, 
        var='wc_kine_reco_Enu',
        dont_load_from_systematic_cache=True, plot_rw_systematics=True, include_systematic_breakdown=True, 
        include_legend=True, bins=np.array(np.linspace(0, 2000, 21)), include_overflow=True, include_underflow=False, log_x=False,
        savename="wc_generic_sel",
        weights_df=presel_weights_df)

In [None]:
# hack to plot the legends without anything behind it
make_plot(pred_sel_df=pred_df, data_sel_df=data_df, 
        var='wc_kine_reco_Enu',
        dont_load_from_systematic_cache=True, plot_rw_systematics=True, include_systematic_breakdown=True, 
        include_legend=True, bins=np.array([-1e9, 1e9, 1e12]), include_overflow=False, include_underflow=False, log_x=False,
        savename="only_legend",
        weights_df=presel_weights_df)

# Creating All RW Systematics Plots

In [None]:
all_p_value_info = []

#plot_vars = sorted(combined_training_vars)
plot_vars = sorted(["pandora_tksh_distance"])

with PdfPages("../plots/all_bdt_vars_open_data.pdf") as pdf:
    for i, var in tqdm(enumerate(plot_vars), total=len(plot_vars)):
        print("plotting", var)
        chi2, ndf, p_value, sigma, inverse_success = make_plot(pred_sel_df=pred_df, data_sel_df=data_df, 
                    var=var,
                    dont_load_from_systematic_cache=True, plot_rw_systematics=True, include_systematic_breakdown=False,
                    include_overflow=False, include_underflow=False,
                    include_legend=False, log_y=True, show=False, return_p_value_info=True,
                    page_num=i+1,
                    weights_df=presel_weights_df)
        all_p_value_info.append((chi2, ndf, p_value, sigma, inverse_success))
        pdf.savefig()
        plt.close()


In [None]:
# save all_p_value_info to a pickle file
#with open("all_p_value_info.pkl", "wb") as f:
#    pickle.dump(all_p_value_info, f)

# load all_p_value_info from a pickle file
with open("all_p_value_info.pkl", "rb") as f:
    all_p_value_info = pickle.load(f)


In [None]:
all_p_values = [p_value for chi2, ndf, p_value, sigma, inverse_success in all_p_value_info]
all_sigmas = [sigma for chi2, ndf, p_value, sigma, inverse_success in all_p_value_info]

bins = np.linspace(0, 1, 101)
plt.figure(figsize=(10, 6))
plt.hist(all_p_values, bins=bins, histtype="step")
plt.axhline(y=len(all_p_values)/(len(bins)-1), color='red', linestyle='--', label="Expected")
plt.xlabel("p-value")
plt.ylabel("relative frequency")
plt.title("All BDT variables after WC generic selection")
plt.show()

bins = np.linspace(0, np.max(np.array(all_sigmas)[np.array(all_sigmas) < np.inf]), 101)
plt.figure(figsize=(10, 6))
plt.hist(all_sigmas, bins=bins, histtype="step")
plt.xlabel(r"$\sigma$")
plt.ylabel("relative frequency")
plt.title("All BDT variables after WC generic selection")
plt.show()


In [None]:
for i in range(len(all_p_value_info)):
    chi2, ndf, p_value, sigma, inverse_success = all_p_value_info[i]
    if sigma > 3:
        print(f"sigma = {sigma} for variable {sorted(plot_vars)[i]}")
    if not inverse_success:
        print(f"failed to invert for variable {sorted(plot_vars)[i]}")
