In [None]:
from __future__ import annotations

from pathlib import Path

import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import mplhep as hep
import numpy as np
import uproot

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
plt.rcParams.update({"font.size": 20})

In [None]:
MAIN_DIR = Path("../../../../")

plot_dir = MAIN_DIR / "plots/BiasTest/24Apr8NonresVBFNodEta"
plot_dir.mkdir(exist_ok=True, parents=True)

In [None]:
resonant = False

if not resonant:
    cards_dir = "24Apr8VBFBDTScanNodEta/txbb_HP_bdt_0.999_lepton_veto_Hbb"
    bias_dir = "24Apr8"
    r_bounds = [-15, 15]
    biases = [0.0, 0.3, 1.0, 1.5]
    samples = ["k2v0"]
else:
    cards_dir = "23Sep22Scale100_bgs_only_scaleqcdsqrt"
    bias_dir = "23Sep22"

    # r_bounds = [-1, 20]
    r_bounds = [-15, 15]

    samples = [
        # "NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190",
        "NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125",
        # "NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250",
    ]

    biases = [0.0, 0.15, 0.3, 1.0]

In [None]:
r_dict = {}

for sample in samples:
    if sample not in r_dict:
        r_dict[sample] = {}

    for bias in biases:
        if bias in r_dict[sample]:
            continue

        print(sample, bias)
        if not resonant:
            file_names = f"/uscms/home/rkansal/hhcombine/cards/{cards_dir}/bias/{bias_dir}/higgsCombinebias{bias}.FitDiagnostics.mH125.*.root"
        else:
            file_names = f"/uscms/home/rkansal/hhcombine/cards/biastests/{cards_dir}/{sample}/bias/{bias_dir}/higgsCombinebias{bias}.FitDiagnostics.mH125.*.root"
        file = uproot.concatenate(file_names)

        r = np.array(file.limit)[::4]
        neg_lim = np.array(file.limit)[1::4]
        pos_lim = np.array(file.limit)[2::4]
        r_negerr = r - neg_lim
        r_poserr = pos_lim - r
        reldiff = r - bias
        reldiff[reldiff < 0] = (reldiff / r_poserr)[reldiff < 0]
        reldiff[reldiff > 0] = (reldiff / r_negerr)[reldiff > 0]

        r_dict[sample][bias] = {
            "r": r,
            "reldiff": reldiff,
            "neg_lim": neg_lim,
            "pos_lim": pos_lim,
        }

In [None]:
# checking in how many fits the ±r values are at the parameter boundary i.e. they are unreliable
for sample in samples:
    print(sample)
    for i, bias in enumerate(biases):
        num_toys = len(r_dict[sample][bias]["r"])

        print(
            f"For r = {bias}, # of successful fits: = {num_toys}, {np.sum(r_dict[sample][bias]['neg_lim'] == r_bounds[0]) / num_toys * 100:.0f}% of these with r- = {r_bounds[0]}, {np.sum(r_dict[sample][bias]['pos_lim'] == r_bounds[1]) / num_toys * 100 :.0f}% with r+ = {r_bounds[1]}"
        )

In [None]:
# checking in how many fits the ±r values are at the parameter boundary AND that side is the one we care about
for sample in samples:
    print(sample)
    for i, bias in enumerate(biases):
        num_toys = len(r_dict[sample][bias]["r"])
        r_lims_bounds = (
            (r_dict[sample][bias]["reldiff"] < 0)
            * (np.isclose(r_dict[sample][bias]["pos_lim"], r_bounds[1]))
        ) + (
            (r_dict[sample][bias]["reldiff"] > 0)
            * (np.isclose(r_dict[sample][bias]["neg_lim"], r_bounds[0]))
        )

        r_lims_wrong = r_dict[sample][bias]["pos_lim"] == r_dict[sample][bias]["neg_lim"]

        tot_pfail = np.sum(r_lims_bounds + r_lims_wrong)

        print(
            f"For r = {bias}, # of successful fits: = {num_toys}, {tot_pfail / num_toys * 100:.0f}% of these with r-lim at boundary"
        )

In [None]:
from scipy import stats

xrange = 3
bins = 21
x = np.linspace(-xrange, xrange, 101)

for sample in samples:
    # if sample != "NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250":
    #     continue
    fig, axs = plt.subplots(len(biases), 1, figsize=(12, len(biases) * 10))

    for i, bias in enumerate(biases):
        r_lims_bounds = (
            (r_dict[sample][bias]["reldiff"] < 0)
            * (np.isclose(r_dict[sample][bias]["pos_lim"], r_bounds[1]))
        ) + (
            (r_dict[sample][bias]["reldiff"] > 0)
            * (np.isclose(r_dict[sample][bias]["neg_lim"], r_bounds[0]))
        )

        r_lims_same = r_dict[sample][bias]["pos_lim"] == r_dict[sample][bias]["neg_lim"]

        fit_fail = r_lims_bounds + r_lims_same

        r = r_dict[sample][bias]["r"][~fit_fail]
        reldiff = r_dict[sample][bias]["reldiff"][~fit_fail]
        reldiff = reldiff[(reldiff > -xrange) * (reldiff < xrange)]

        mu, sigma = np.mean(reldiff), np.std(reldiff)

        ax = axs[i] if len(biases) > 1 else axs

        ax.hist(reldiff, np.linspace(-xrange, xrange, bins + 1), histtype="step")
        ax.plot(
            x,
            # scale by bin width
            stats.norm.pdf(x, loc=mu, scale=sigma) * len(r) * (2 * xrange / bins),
            label=rf"$\mu = {mu:.2f}, \sigma = {sigma:.2f}$",
        )
        ax.set_xlabel(rf"$\frac{{\hat{{r}} - {bias}}}{{\Delta \hat r}}$")
        ax.set_ylabel("Number of toys")
        ax.set_title(f"r = {bias}")
        ax.legend()

        hep.cms.label(
            "Preliminary",
            ax=ax,
            data=True,
            lumi=138,
            year=None,
        )

    plt.savefig(f"{plot_dir}/{sample}.pdf", bbox_inches="tight")
    plt.show()

    # break

In [None]:
sample = "NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125"
bias = 0.0

r_lims_bounds = (
    (r_dict[sample][bias]["reldiff"] < 0)
    * (np.isclose(r_dict[sample][bias]["pos_lim"], r_bounds[1]))
) + (
    (r_dict[sample][bias]["reldiff"] > 0)
    * (np.isclose(r_dict[sample][bias]["neg_lim"], r_bounds[0]))
)

r_lims_same = r_dict[sample][bias]["pos_lim"] == r_dict[sample][bias]["neg_lim"]

fit_fail = r_lims_bounds + r_lims_same

preldiff = r_dict[sample][bias]["reldiff"][~fit_fail]
pposlim = r_dict[sample][bias]["pos_lim"][~fit_fail]
pneglim = r_dict[sample][bias]["neg_lim"][~fit_fail]
pr = r_dict[sample][bias]["r"][~fit_fail]

select = (preldiff >= -0.7) * (preldiff <= -0.6)

pos_lim = pposlim[select]
pos_lim

In [None]:
pr[select]

In [None]:
preldiff[select]

In [None]:
plt.hist(r_dict[sample][bias]["pos_lim"])

In [None]:
plt.hist(r_dict[sample][bias]["neg_lim"])

In [None]:
r = pr[select]
r

In [None]:
plt.hist(r_dict[sample][bias]["r"])

In [None]:
neg_lim = pneglim[select]
neg_lim

In [None]:
plt.hist(neg_lim)

In [None]:
from ipywidgets import FloatSlider, interact


def phist(cut):
    plt.hist(pneglim[preldiff > cut], np.linspace(-0.5, 0.5, 21), histtype="step")


interact(phist, cut=FloatSlider(min=-1, max=5, step=0.1))

In [None]:
reldiff = r_dict[sample][bias]["reldiff"][~fit_fail][select]
reldiff