In [None]:
from typing import List
import uproot
import numpy as np
import matplotlib.pyplot as plt
import mplhep as hep
import matplotlib.ticker as mticker
import os

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
plt.rcParams.update({"font.size": 20})

In [None]:
MAIN_DIR = "../../../"

plot_dir = f"{MAIN_DIR}/plots/GoF/Apr27"
_ = os.system(f"mkdir -p {plot_dir}")

In [None]:
def p_value(data_ts: float, toy_ts: List[float]):
    return np.mean(toy_ts >= data_ts)


def F_statistic(
    ts_low: List[float], ts_high: list[float], ord_low: int, ord_high: int, num_bins: int = 10 * 14
):
    numerator = -2.0 * np.log(ts_low / ts_high) / (ord_high - ord_low)
    denominator = -2.0 * np.log(ts_high) / (num_bins - ord_high)

    return numerator / denominator

In [None]:
cards_dir = "f_tests/Apr26/"
toy_tests = {}  # toy test statistics
data_tests = {}  # data test statistics
f_toy_tests = {}
f_data_tests = {}
f_toy_tests = {}

for nTF1 in range(2):
    toy_tests[nTF1] = {}
    data_tests[nTF1] = {}
    f_toy_tests[nTF1] = {}
    f_data_tests[nTF1] = {}
    for nTF2 in range(2):
        file = uproot.concatenate(
            f"/uscms/home/rkansal/nobackup/CMSSW_11_2_0/src/cards/{cards_dir}/nTF1_{nTF1}_nTF2_{nTF2}/higgsCombineValidationToys.GoodnessOfFit.mH125.*.root"
        )
        toy_tests[nTF1][nTF2] = np.array(file["limit"])  # toy test statistics

        file = uproot.concatenate(
            f"/uscms/home/rkansal/nobackup/CMSSW_11_2_0/src/cards/{cards_dir}/nTF1_{nTF1}_nTF2_{nTF2}/higgsCombinepassData.GoodnessOfFit.mH125.root"
        )

        data_tests[nTF1][nTF2] = file["limit"][0]  # data test statistic

        if not (nTF1 == 0 and nTF2 == 0):
            f_toy_tests[nTF1][nTF2] = F_statistic(
                toy_tests[nTF1][nTF2], toy_tests[0][0], nTF1 + nTF2, 0
            )
            f_data_tests[nTF1][nTF2] = F_statistic(
                data_tests[nTF1][nTF2], data_tests[0][0], nTF1 + nTF2, 0
            )

In [None]:
data_ts, toy_ts = data_tests[0][0], toy_tests[0][0]
plot_max = max(np.max(toy_ts), data_ts)
plot_min = min(np.min(toy_ts), data_ts)
gof_pval = p_value(data_ts, toy_ts)
gof_pval

In [None]:
plt.figure(figsize=(12, 8))
_ = plt.hist(
    toy_ts, np.linspace(plot_min, plot_max, 10), color="dimgrey", histtype="step", label="Toys"
)
plt.axvline(data_ts, color="red", linestyle=":", label=rf"Data ($p$-value = {gof_pval:.2f})")
_ = plt.legend()
plt.title("Goodness of Fit")
plt.ylabel("Number of Toys")
plt.xlabel("Test Statistics")
plt.savefig(f"{plot_dir}/gof00.pdf")

In [None]:
for ord1, ord2 in [[0, 1], [1, 0]]:
    pval = p_value(f_data_tests[ord1][ord2], f_toy_tests[ord1][ord2])
    plot_min = min(np.min(f_toy_tests[ord1][ord2]), f_data_tests[ord1][ord2])
    plot_max = max(np.max(f_toy_tests[ord1][ord2]), f_data_tests[ord1][ord2])

    plt.figure(figsize=(12, 8))
    _ = plt.hist(
        f_toy_tests[ord1][ord2],
        np.linspace(plot_min, plot_max, 10),
        color="dimgrey",
        histtype="step",
        label="Toys",
    )
    plt.axvline(
        f_data_tests[ord1][ord2],
        color="red",
        linestyle=":",
        label=rf"Data ($p$-value = {pval:.2f})",
    )
    _ = plt.legend()
    plt.title("F-test")
    plt.ylabel("Number of Toys")
    plt.xlabel("Test Statistics")
    plt.savefig(f"{plot_dir}/ftest{ord1}{ord2}.pdf")