In [None]:
from typing import List
import uproot
import numpy as np
import matplotlib.pyplot as plt
import mplhep as hep
import matplotlib.ticker as mticker
import os

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
plt.rcParams.update({"font.size": 20})

In [None]:
MAIN_DIR = "../../../"

plot_dir = f"{MAIN_DIR}/plots/FTests/23May14"
_ = os.system(f"mkdir -p {plot_dir}")

In [None]:
def p_value(data_ts: float, toy_ts: List[float]):
    return np.mean(toy_ts >= data_ts)


def F_statistic(
    ts_low: List[float],
    ts_high: list[float],
    ord_low: int,
    ord_high: int,
    num_bins: int = 10 * 14,
    dim: int = 2,
):
    numerator = (ts_low - ts_high) / (ord_high - ord_low)
    denominator = ts_high / (num_bins - (ord_high + dim))

    return numerator / denominator

Nonresonant

In [None]:
local_cards_dir = "/uscms/home/rkansal/hhcombine/cards/f_tests/23May14/"
test_orders = [0]
test_statistics = {}

for o1 in test_orders:
    tdict = {"toys": {}, "data": {}, "ftoys": {}, "fdata": {}}
    tlabel = f"{o1}"

    for nTF in [o1, o1 + 1]:
        # for nTF1, nTF2 in [(o1, o2)]:
        tflabel = f"{nTF}"

        # test statistics for toys generated by (o1, o2) order model
        file = uproot.concatenate(
            f"{local_cards_dir}/nTF_{nTF}/higgsCombineToys{tlabel}.GoodnessOfFit.mH125.444.root"
        )
        tdict["toys"][tflabel] = np.array(file["limit"])

        # data test statistic
        file = uproot.concatenate(
            f"{local_cards_dir}/nTF_{nTF}/higgsCombineData.GoodnessOfFit.mH125.root"
        )
        tdict["data"][tflabel] = file["limit"][0]

        if not nTF == o1:
            tdict["ftoys"][tflabel] = F_statistic(
                tdict["toys"][tlabel], tdict["toys"][tflabel], o1, nTF
            )
            tdict["fdata"][tflabel] = F_statistic(
                tdict["data"][tlabel], tdict["data"][tflabel], o1, nTF
            )

    test_statistics[tlabel] = tdict

Resonant

In [None]:
eos_cards_dir = "/eos/uscms/store/user/rkansal/bbVV/cards/f_tests/23May2/"
local_cards_dir = "/uscms/home/rkansal/hhcombine/cards/f_tests/23May2/"
test_orders = [(0, 0), (1, 0), (0, 1), (1, 1), (2, 1), (1, 2), (2, 0), (0, 2)]
# test_orders = [(3, 1), (2, 2)]
test_statistics = {}

for o1, o2 in test_orders:
    tdict = {"toys": {}, "data": {}, "ftoys": {}, "fdata": {}}
    tlabel = f"{o1}{o2}"

    for nTF1, nTF2 in [(o1, o2), (o1 + 1, o2), (o1, o2 + 1)]:
        # for nTF1, nTF2 in [(o1, o2)]:
        tflabel = f"{nTF1}{nTF2}"

        # test statistics for toys generated by (o1, o2) order model
        file = uproot.concatenate(
            f"{eos_cards_dir}/nTF1_{nTF1}_nTF2_{nTF2}/higgsCombineToys{tlabel}Seed*44.GoodnessOfFit.mH125.*44.root"
        )
        tdict["toys"][tflabel] = np.array(file["limit"])

        # data test statistic
        file = uproot.concatenate(
            f"{local_cards_dir}/nTF1_{nTF1}_nTF2_{nTF2}/higgsCombineData.GoodnessOfFit.mH125.root"
        )
        tdict["data"][tflabel] = file["limit"][0]

        if not (nTF1 == o1 and nTF2 == o2):
            tdict["ftoys"][tflabel] = F_statistic(
                tdict["toys"][f"{o1}{o2}"], tdict["toys"][tflabel], o1 + o2, nTF1 + nTF2
            )
            tdict["fdata"][tflabel] = F_statistic(
                tdict["data"][f"{o1}{o2}"], tdict["data"][tflabel], o1 + o2, nTF1 + nTF2
            )

    test_statistics[tlabel] = tdict

In [None]:
from scipy import stats


def plot_tests(
    data_ts: float,
    toy_ts: np.ndarray,
    name: str,
    title: str = None,
    bins: int = 15,
    fit: str = None,
    fdof2: int = None,
):
    plot_max = max(np.max(toy_ts), data_ts)
    # plot_max = max(np.max(toy_ts), data_ts) if fit != "chi2" else 200
    # plot_min = min(np.min(toy_ts), data_ts, 0)
    plot_min = 0
    pval = p_value(data_ts, toy_ts)

    plt.figure(figsize=(12, 8))
    h = plt.hist(
        toy_ts,
        np.linspace(plot_min, plot_max, bins + 1),
        color="#8C8C8C",
        histtype="step",
        label=f"{len(toy_ts)} Toys",
    )
    plt.axvline(data_ts, color="#FF502E", linestyle=":", label=rf"Data ($p$-value = {pval:.2f})")

    if fit is not None:
        x = np.linspace(plot_min + 0.01, plot_max, 100)

        if fit == "chi2":
            res = stats.fit(stats.chi2, toy_ts, [(0, 200)])
            pdf = stats.chi2.pdf(x, res.params.df)
            label = rf"$\chi^2_{{DoF = {res.params.df:.2f}}}$ Fit"
        elif fit == "f":
            pdf = stats.f.pdf(x, 1, fdof2)
            label = rf"$F-dist_{{DoF = (1, {fdof2})}}$"
        else:
            raise ValueError("Invalid fit")

        plt.plot(
            x,
            pdf * (np.max(h[0]) / np.max(pdf)),
            color="#1f78b4",
            linestyle="--",
            # alpha=0.6,
            label=label,
        )

    hep.cms.label(
        "Work in Progress",
        data=True,
        lumi=138,
        year=None,
    )

    _ = plt.legend()
    plt.title(title)
    plt.ylabel("Number of Toys")
    plt.xlabel("Test Statistics")

    plt.savefig(f"{plot_dir}/{name}.pdf", bbox_inches="tight")

Nonresonant

In [None]:
o1 = 0  # order being tested
tlabel = f"{o1}"

data_ts, toy_ts = test_statistics[tlabel]["data"][tlabel], test_statistics[tlabel]["toys"][tlabel]
plot_tests(data_ts, toy_ts, "gof" + tlabel, fit="chi2", bins=20)

ord1 = 1
tflabel = f"{ord1}"
data_ts, toy_ts = pval = (
    test_statistics[tlabel]["fdata"][tflabel],
    test_statistics[tlabel]["ftoys"][tflabel],
)
plot_tests(data_ts, toy_ts, f"f{tlabel}_{tflabel}", title=f"{o1} vs. {ord1}")

Resonant

In [None]:
o1, o2 = 2, 0  # order being tested
tlabel = f"{o1}{o2}"

data_ts, toy_ts = test_statistics[tlabel]["data"][tlabel], test_statistics[tlabel]["toys"][tlabel]
plot_tests(data_ts, toy_ts, "gof" + tlabel, fit="chi2", bins=20)

for ord1, ord2 in [[o1 + 1, o2], [o1, o2 + 1]]:
    tflabel = f"{ord1}{ord2}"
    data_ts, toy_ts = pval = (
        test_statistics[tlabel]["fdata"][tflabel],
        test_statistics[tlabel]["ftoys"][tflabel],
    )
    plot_tests(data_ts, toy_ts, f"f{tlabel}_{tflabel}", title=f"({o1}, {o2}) vs. ({ord1}, {ord2})")