# This notebook is a simple example of how we use the plotting tools in the freqfit repo, as well as how we access our data

In [None]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import glob
from freqfit.models.constants import QBB, NA, M76


import matplotlib

scale_factor = 1

font = {"size": 22}
matplotlib.rc("font", **font)
NICE_BLUE = "#668DA5"
NICE_RED = "#B4584D"
NICE_GREEN = "#ABB1A2"
NICE_PINK = "#CCACAD"

# Add small random gaussian numbers to the test-statistics from toys
RANDOM = False
RV_SIZE = 1e-6
TRUNC = False  # truncate the test-statistics at 6 sigfigs

In [None]:
# Get the directory with our toys with signal

dir_name = "/data/eliza1/LEGEND/data/L200/limit/l200_toys_minimum_minimizer_truncated_gaussian_fixed_nuisance_0_lower_limit_10312024/"

# Read in the file with the s-grid and the values of the observed test-statistic
f = dir_name + "l200_ts_wilks_fine.h5"

In [None]:
# Plot the values of the observed test-statistic
df = h5py.File(f, "r")
s = df["s"][:]
s_scanned = s
ts = df["t_s"][:]
df.close()


plt.scatter(s, ts)
plt.title("L200 Test Statistic Profile")
plt.ylabel(r"$\tilde{t}_S$")
plt.xlabel(r"$S$")
plt.axhline(y=2.71)
plt.show()

In [None]:
# The test-statistics generated from the toys are stored in separate files keyed by S-Value_jobid.h5
# Gather a list of lists, where each list is all the files corresponding to one value of S

files_per_s = []
s_f_name_vals = []  # s-values from the file name keys


for s_val in s_scanned[:]:
    files = glob.glob(dir_name + f"{s_val}_*.h5")
    files_per_s.append(files)

In [None]:
# Read in the values of the test-statistics generated from the toys, and also determine the 90% critical value
# The array toys_per_scanned_s is a list of lists, where each list is all of the test-statistics at one value of S
from freqfit.statistics import ts_critical

t_crits = []
t_crit_lows = []
t_crit_his = []

toys_per_scanned_s = []

for file_list in files_per_s[:]:
    toy_ts_per_file = []
    try:
        if RANDOM:
            for FILE in file_list:
                f = h5py.File(FILE, "r")
                toy_ts_per_file.extend(
                    f["ts"][:] + np.random.normal(0, RV_SIZE, len(f["ts"][:]))
                )
                f.close()
        elif TRUNC:
            for FILE in file_list:
                f = h5py.File(FILE, "r")
                toy_ts_per_file.extend(np.around(f["ts"][:], 9))
                f.close()
        else:
            for FILE in file_list:
                f = h5py.File(FILE, "r")
                toy_ts_per_file.extend(f["ts"][:])
                f.close()

        tcrit_tuple, _ = ts_critical(
            toy_ts_per_file, threshold=0.9, confidence=0.68, plot=False
        )
        t_crit, t_crit_low, t_crit_high = tcrit_tuple

        t_crits.append(t_crit)
        t_crit_lows.append(t_crit_low)
        t_crit_his.append(t_crit_high)

        toys_per_scanned_s.append(toy_ts_per_file)
    except:
        t_crits.append(np.nan)
        t_crit_lows.append(np.nan)
        t_crit_his.append(np.nan)

In [None]:
# Make an example plot of the observed test statistic and the critical value of the test statistic
# Nothing is decided by this plot, it is just nice to make to see how things look visually
from freqfit.statistics import find_crossing

plt.figure(figsize=(14, 10))
plt.scatter(s_scanned, ts, color=NICE_BLUE)
plt.plot(s_scanned, t_crits, c=NICE_RED, label=r"MC Critical $\tilde{t}_S$")
plt.fill_between(s_scanned, t_crit_lows, t_crit_his, alpha=0.6, color=NICE_RED)


s_approx = find_crossing(s_scanned, t_crits, ts)

plt.axvline(x=s_approx[-1], label="90% CL Crossing", ls="--", c=NICE_BLUE)

T_est = 1 / (M76 * s_approx[-1] * scale_factor / (np.log(2) * NA))


plt.xlabel("S")
plt.ylabel(r"$\tilde{t}_S$")
plt.title(r"LEGEND-200 Neutrino 2024" + " Limit\n" + f"{T_est: 0.2e}", usetex=True)


plt.legend()
plt.show()

In [None]:
# Create the p-values of the observed data for the Brazil plot
from freqfit.statistics import get_p_values

p_values = get_p_values(toys_per_scanned_s, ts)

In [None]:
plt.figure(figsize=(12, 8))
plt.scatter(s_scanned, p_values, color=NICE_BLUE)


plt.xlabel("S")
plt.ylabel(r"p-value")


s_approx = find_crossing(s_scanned, p_values, 0.1)

plt.axhline(y=0.1, label="90% CL", c=NICE_GREEN)
plt.axvline(x=s_approx[-1], label="90% CL Crossing", ls="--", c=NICE_BLUE)

T_est = 1 / (M76 * s_approx[-1] * scale_factor / (np.log(2) * NA))
plt.title(
    r"LEGEND-200 Neutrino 2024 Dataset" + " Limit\n" + f"{T_est: 0.2e}", usetex=True
)
plt.yscale("log")
plt.legend()
plt.show()

In [None]:
# Add the directory where our zero-signal toys are located

brazil_dir_path = "/data/eliza1/LEGEND/data/L200/limit/l200_brazil_minimum_minimizer_truncated_gaussian_fixed_nuisance_0_lower_limit_10312024"
brazil_files = glob.glob(brazil_dir_path + "/*.h5")

In [None]:
# We save the s-values that were scanned for the 0-signal toys inside the files, so grab them out
f = h5py.File(brazil_files[0], "r")
s_values_zero_sig = f["s"][:]
f.close()

In [None]:
# Read in the test-statistics
# toys_per_s_zero_sig is a list of lists, where each list is test-statistics of 0-signal toy tested against that S-value
toys_per_s_zero_sig = []
Es_per_s_zero_sig = []
seeds_per_s_zero_sig = []
for i in range(len(s_values_zero_sig[:])):
    toys_per_file = []
    Es_per_file = []
    seeds_per_file = []

    if RANDOM:
        for file in brazil_files:
            f = h5py.File(file, "r")
            ts = f["ts"][:]
            Es = f["Es"][:]
            seeds = f["seed"][:]
            toys_per_file.extend(ts[i] + np.random.normal(0, RV_SIZE, len(ts[i])))
            Es_per_file.extend(Es)
            seeds_per_file.extend(seeds)

    elif TRUNC:
        for file in brazil_files:
            f = h5py.File(file, "r")
            ts = f["ts"][:]
            Es = f["Es"][:]
            seeds = f["seed"][:]
            toys_per_file.extend(np.around(ts[i], 9))
            Es_per_file.extend(Es)
            seeds_per_file.extend(seeds)
    else:
        for file in brazil_files:
            f = h5py.File(file, "r")
            ts = f["ts"][:]
            Es = f["Es"][:]
            seeds = f["seed"][:]
            toys_per_file.extend(ts[i])
            Es_per_file.extend(Es)
            seeds_per_file.extend(seeds)
    toys_per_s_zero_sig.append(toys_per_file)
    Es_per_s_zero_sig.append(Es_per_file)
    seeds_per_s_zero_sig.append(seeds_per_file)

In [None]:
# get the median p-values
from math import erf
from freqfit.statistics import sensitivity

p_values_median, p_values_hi, p_values_lo = sensitivity(
    toys_per_s_zero_sig[:],
    toys_per_scanned_s[:],
    s_scanned[:],
    CL=erf(2 / np.sqrt(2)),
    plot=False,
    plot_dir="/home/sjborden/freqfit/l200_plots/",
    step=0.001,
    save=False,
)
p_values_median, p_values_hi_1, p_values_lo_1 = sensitivity(
    toys_per_s_zero_sig[:],
    toys_per_scanned_s[:],
    s_scanned[:],
    CL=erf(1 / np.sqrt(2)),
    plot=False,
    step=0.001,
)

In [None]:
# Make the Brazil plot

ma = 0.0759214027
NA = 6.022e23


T_12 = 1 / (M76 * s / (np.log(2) * NA))
T_12 = 1 / T_12 / 1e-25

T_12_exp = 1 / (M76 * s_values_zero_sig / (np.log(2) * NA))
T_12_exp = 1 / T_12_exp / 1e-25

BRAZIL_YELLOW = "#FFFF00"
BRAZIL_GREEN = "#00FF00"

plt.figure(figsize=(12, 8))
plt.plot(T_12, p_values, color="k", label="Observed")
plt.plot(T_12_exp, p_values_median, color="k", ls="--", label="Median, No Signal")
plt.fill_between(
    T_12_exp,
    p_values_lo,
    p_values_hi,
    color=BRAZIL_YELLOW,
    alpha=0.6,
    label=r"2 $\sigma$ Interval",
)
plt.fill_between(
    T_12_exp,
    p_values_lo_1,
    p_values_hi_1,
    color=BRAZIL_GREEN,
    alpha=0.6,
    label=r"1 $\sigma$ Interval",
)


s_approx = find_crossing(s, p_values_median, 0.1)
T_12_median = 1 / (M76 * s_approx[-1] / (np.log(2) * NA))

plt.yscale("log")
plt.xlabel(r"$\Gamma_{1/2}^{0\nu} \, [10^{-25} \,\mathrm{yr}^{-1}]$ ")
plt.ylabel("p-value")
plt.ylim([1e-4, 1])
plt.plot([], [], ls="none", label=f"Median {T_12_median: 0.2e}")
plt.plot([], [], ls="none", label=f"Observed Limit {T_est: 0.2e}")
plt.legend()
plt.title(
    f"Minimum Minimizer + Truncated Gaussian"
    + f"\n Fixed Nuisance Parameters + Poisson Initial Guess \n No RV Added \n Lower Limits at 0 \nLEGEND-200 Neutrino 2024 Dataset",
    usetex=True,
)

plt.show()

# Here is how to get the median exclusion sensitivity

In [None]:
import numba as nb
import freqfit.models.constants as constants

# sort all of the toys we generated with non-zero signal so we can compute p-values very quickly
sorted_toys_per_scanned_s = []

for toy in toys_per_scanned_s:
    sorted_toys_per_scanned_s.append(np.sort(toy))


@nb.jit(nopython=True, parallel=True, fastmath=True)
def brazil_data_new(sorted_toy_ts: np.array, ts_observed: np.array):
    """
    Parameters
    ----------
    toy_ts
        List of lists. Each list is a list of test statistics from the toys generated at that value
    ts_observed
        List. These are the observed values of the test statistic from the experiment for a given value

    Returns
    -------
    p_values
        A list of the p-values associated with the observed data
    """

    if len(sorted_toy_ts) != len(ts_observed):
        raise ValueError(
            "The number of scanned points for the toys is not equal to the number of observed test statistics"
        )

    p_values = np.zeros(len(ts_observed))
    for i in nb.prange(len(ts_observed)):
        p_values[i] = len(sorted_toy_ts[i][sorted_toy_ts[i] >= ts_observed[i]]) / len(
            sorted_toy_ts[i]
        )

    return p_values


from tqdm import tqdm

num_toys = len(toys_per_s_zero_sig[0])
T_12_90CL = []

plot = False

WINDOW = np.array(constants.WINDOW)

for i in tqdm(range(num_toys)):
    nike = []
    for toys in toys_per_s_zero_sig:
        nike.append(toys[i])

    p_values = brazil_data_new(sorted_toys_per_scanned_s, nike)

    s_approx = find_crossing(s_values_zero_sig, p_values, 0.1)

    if p_values[-1] > 0.1:
        s_approx_est = s_values_zero_sig[-1]
    elif len(s_approx) < 1:
        s_approx_est = s_values_zero_sig[0]
    else:
        s_approx_est = s_approx[-1]
    T_est = 1 / (M76 * s_approx_est / (np.log(2) * NA))

    T_12_90CL.append(T_est)

In [None]:
T_12_90CL = np.array(T_12_90CL)
print(np.nanmedian(T_12_90CL))
plt.figure(figsize=(12, 8))
plt.hist(1 / T_12_90CL, bins=1000)
plt.axvline(
    x=np.nanmedian(1 / T_12_90CL),
    ls="--",
    label=f"Median 90% CL Half-Life: {np.nanmedian(T_12_90CL):.2e}",
)
plt.yscale("log")
plt.legend()
plt.xlabel("90% CL Half-Rate From 0 Signal Toys [yr^-1]")
plt.ylabel("Counts")
plt.title(
    f"Minimum Minimizer + Truncated Gaussian"
    + f"\n Fixed Nuisance Parameters + Poisson Initial Guess \n No Sigma Gaussian RV\n  0 Lower Limit on S and BI \n LEGEND-200 Neutrino 2024 Dataset",
    usetex=True,
)
plt.xlim([0, None])

plt.show()