In [None]:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib as mpl
mpl.rcParams['hatch.linewidth'] = 0.2
import numpy as np
import pandas as pd
import pickle
from tqdm.notebook import tqdm
import polars as pl

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from src.signal_categories import topological_category_labels, topological_category_colors, topological_category_labels_latex, topological_category_hatches, topological_categories_dic
from src.signal_categories import filetype_category_labels, filetype_category_colors, filetype_category_hatches
from src.signal_categories import del1g_detailed_category_labels, del1g_detailed_category_colors, del1g_detailed_category_labels_latex, del1g_detailed_category_hatches, del1g_detailed_categories_dic
from src.signal_categories import del1g_simple_category_labels, del1g_simple_category_colors, del1g_simple_category_labels_latex, del1g_simple_category_hatches, del1g_simple_categories_dic
from src.signal_categories import train_category_labels, train_category_labels_latex

from src.ntuple_variables.pandora_variables import pandora_scalar_second_half_training_vars

from src.file_locations import intermediate_files_location

from src.plot_helpers import make_histogram_plot

from src.ntuple_variables.variables import combined_training_vars

from src.systematics import get_significance_from_p_value

plt.rcParams.update({'font.size': 12})

from src.file_locations import intermediate_files_location


In [None]:
with open("../plots/all_p_value_info.pkl", "rb") as f:
    all_p_value_info = pickle.load(f)


In [None]:
for key in all_p_value_info[0].keys():
    print(key)


In [None]:
for p_value_info_dic in all_p_value_info:
    var = p_value_info_dic["var"]
    tot_sigma = p_value_info_dic["tot_sigma"]
    if tot_sigma > 3:
        print(var, tot_sigma)


In [None]:
nodetvar_p_values = []
nodetvar_sigmas = []
tot_p_values = []
tot_sigmas = []
nodetvar_global_sigmas = []
nodetvar_global_p_values = []
tot_global_sigmas = []
tot_global_p_values = []

for p_value_info_dic in all_p_value_info:
    nodetvar_p_values.append(p_value_info_dic["nodetvar_p_value"])
    nodetvar_sigmas.append(p_value_info_dic["nodetvar_sigma"])
    tot_p_values.append(p_value_info_dic["tot_p_value"])
    tot_sigmas.append(p_value_info_dic["tot_sigma"])
    nodetvar_global_sigmas.append(p_value_info_dic["nodetvar_global_sigma"])
    nodetvar_global_p_values.append(p_value_info_dic["nodetvar_global_p_value"])
    tot_global_sigmas.append(p_value_info_dic["tot_global_sigma"])
    tot_global_p_values.append(p_value_info_dic["tot_global_p_value"])


In [None]:
bins = np.linspace(0, 1, 51)
plt.figure(figsize=(10, 6))
plt.hist(nodetvar_p_values, bins=bins, histtype="step", label="No DetVar")
plt.hist(tot_p_values, bins=bins, histtype="step", label="With DetVar")
plt.hist(nodetvar_global_p_values, bins=bins, histtype="step", label="No DetVar Decomp")
plt.hist(tot_global_p_values, bins=bins, histtype="step", label="With DetVar Decomp")
plt.axhline(y=len(nodetvar_p_values)/(len(bins)-1), color='red', linestyle='--', label="Expected")
plt.xlabel("p-value")
plt.ylabel("relative frequency")
plt.yscale("log")
plt.title("All BDT variables after WC generic selection")
plt.legend()
plt.show()

all_sigmas = tot_sigmas + nodetvar_sigmas + tot_global_sigmas + nodetvar_global_sigmas

bins = np.linspace(0, np.max(np.array(all_sigmas)[np.array(all_sigmas) < np.inf]), 51)
plt.figure(figsize=(10, 6))
plt.hist(nodetvar_sigmas, bins=bins, histtype="step", label="No DetVar")
plt.hist(tot_sigmas, bins=bins, histtype="step", label="With DetVar")
plt.hist(nodetvar_global_sigmas, bins=bins, histtype="step", label="No DetVar Decomp")
plt.hist(tot_global_sigmas, bins=bins, histtype="step", label="With DetVar Decomp")
plt.xlabel(r"$\sigma$")
plt.ylabel("relative frequency")
plt.yscale("log")
plt.title("All BDT variables after WC generic selection")
plt.legend()
plt.show()


In [None]:
print(1/0)

In [None]:
print("loading presel_detvar_df_train_vars.parquet...")
presel_detvar_df = pl.read_parquet(f"{intermediate_files_location}/detvar_presel_df_train_vars.parquet")
print(f"{presel_detvar_df.shape=}")


# Test Plot With RW Systematics

In [None]:
reco_categories = train_category_labels
reco_category_labels_latex = train_category_labels_latex


print("loading all_df.parquet...")
all_df = pl.read_parquet(f"{intermediate_files_location}/presel_df_train_vars.parquet")
print(f"{all_df.shape=}")

print("loading presel_weights_df.parquet...")
presel_weights_df = pl.read_parquet(f"{intermediate_files_location}/presel_weights_df.parquet")
print(f"{presel_weights_df.shape=}")

pred_df = all_df.filter(
    ~pl.col("filetype").is_in(["data", "isotropic_one_gamma_overlay", "delete_one_gamma_overlay"])
)
data_df = all_df.filter(
    pl.col("filetype") == "data"
)


In [None]:
make_histogram_plot(pred_sel_df=pred_df, data_sel_df=data_df, bins=np.array(np.linspace(0, 2000, 21)), 
            include_overflow=True, include_underflow=False, log_x=False, log_y=False, include_legend=False,
            var="wc_kine_reco_Enu", title="Preselection", selname="wc_generic_sel", savename="wc_generic_sel",
            include_ratio=True, include_decomposition=True,
            use_rw_systematics=True, use_detvar_systematics=True, detvar_df=presel_detvar_df,
            dont_load_rw_from_systematic_cache=True, dont_load_detvar_from_systematic_cache=True,
            )


# Creating All RW Systematics Plots

In [None]:
all_p_value_info = []

plot_vars = sorted(combined_training_vars)[:3]

with PdfPages("../plots/all_bdt_vars_open_data.pdf") as pdf:
    for i, var in tqdm(enumerate(plot_vars), total=len(plot_vars)):
        print("plotting", var)

        p_value_info_dic = make_histogram_plot(pred_sel_df=pred_df, data_sel_df=data_df, 
            include_overflow=False, include_underflow=False, log_y=True, include_legend=False,
            var=var, title="Preselection", selname="wc_generic_sel", savename="only_legend",
            include_ratio=True, include_decomposition=True,
            use_rw_systematics=True, use_detvar_systematics=True, detvar_df=presel_detvar_df,
            page_num=i+1, weights_df=presel_weights_df
            )

        all_p_value_info.append(p_value_info_dic)
        pdf.savefig()
        plt.close()


In [None]:
# save all_p_value_info to a pickle file
with open("all_p_value_info.pkl", "wb") as f:
    pickle.dump(all_p_value_info, f)

# load all_p_value_info from a pickle file
#with open("all_p_value_info.pkl", "rb") as f:
#    all_p_value_info = pickle.load(f)


In [None]:
all_p_values = [p_value_info_dic["nodetvar_p_value"] for p_value_info_dic in all_p_value_info]
all_sigmas = [p_value_info_dic["nodetvar_sigma"] for p_value_info_dic in all_p_value_info]

bins = np.linspace(0, 1, 101)
plt.figure(figsize=(10, 6))
plt.hist(all_p_values, bins=bins, histtype="step")
plt.axhline(y=len(all_p_values)/(len(bins)-1), color='red', linestyle='--', label="Expected")
plt.xlabel("p-value")
plt.ylabel("relative frequency")
plt.title("All BDT variables after WC generic selection")
plt.show()

bins = np.linspace(0, np.max(np.array(all_sigmas)[np.array(all_sigmas) < np.inf]), 101)
plt.figure(figsize=(10, 6))
plt.hist(all_sigmas, bins=bins, histtype="step")
plt.xlabel(r"$\sigma$")
plt.ylabel("relative frequency")
plt.title("All BDT variables after WC generic selection")
plt.show()


In [None]:
for i in range(len(all_p_value_info)):
    chi2, ndf, p_value, sigma, inverse_success = all_p_value_info[i]
    if sigma > 3:
        print(f"sigma = {sigma} for variable {sorted(plot_vars)[i]}")
    if not inverse_success:
        print(f"failed to invert for variable {sorted(plot_vars)[i]}")
