In [None]:
import awkward as ak
import numpy as np
import hist
from hist import Hist
import matplotlib.pyplot as plt
import mplhep as hep
import pickle
import math
import os
from hist.intervals import clopper_pearson_interval
import utils

plt.rcParams.update({"font.size": 16})
plt.style.use(hep.style.CMS)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
plot_dir = "../../../plots/TriggerSFs/24Feb15"
_ = os.system(f"mkdir -p {plot_dir}")

In [None]:
combined = {}
years = ["2016APV", "2016", "2017", "2018"]

for year in years:
    with open(f"trigEffs/{year}_combined.pkl", "rb") as filehandler:
        combined[year] = pickle.load(filehandler)

Merge highest two pT bins and re-save (comment from Higgs conveners)

In [None]:
# for year in years:
#     for h in ["den", "num"]:
#         combined[year][h] = utils.rebin_hist(
#             combined[year][h], "jet1pt", [250, 275, 300, 325, 350, 375, 400, 450, 500, 600, 1000]
#         )
#         combined[year][h] = utils.rebin_hist(
#             combined[year][h], "jet1msd", list(range(0, 241, 20)) + [300]
#         )

#     with open(f"trigEffs/{year}_combined.pkl", "wb") as filehandler:
#         pickle.dump(combined[year], filehandler)

In [None]:
effs_txbb = {
    year: combined[year]["num"][:, sum, :, :] / combined[year]["den"][:, sum, :, :]
    for year in years
}
effs_th4q = {
    year: combined[year]["num"][sum, :, :, :] / combined[year]["den"][sum, :, :, :]
    for year in years
}

In [None]:
errors_txbb = {}
intervals_txbb = {}
errors_th4q = {}
intervals_th4q = {}

for year in years:
    intervals = clopper_pearson_interval(
        combined[year]["num"][:, sum, :, :].view(flow=False),
        combined[year]["den"][:, sum, :, :].view(flow=False),
    )
    errors_txbb[year] = (intervals[1] - intervals[0]) / 2
    intervals_txbb[year] = intervals

    intervals = clopper_pearson_interval(
        combined[year]["num"][sum, :, :, :].view(flow=False),
        combined[year]["den"][sum, :, :, :].view(flow=False),
    )
    errors_th4q[year] = (intervals[1] - intervals[0]) / 2
    intervals_th4q[year] = intervals

## Plotting

In [None]:
def mesh2d(
    ax,
    xbins,
    ybins,
    vals,
    vmax,
    title,
    year,
    xlabel=r"Jet 1 MassSD (GeV)",
    ylabel=r"Jet 1 $p_T$ (GeV)",
    fontsize=28,
):
    mesh = ax.pcolormesh(xbins, ybins, vals, cmap="turbo", vmin=0, vmax=vmax)
    for i in range(len(ybins) - 1):
        for j in range(len(xbins) - 1):
            if not math.isnan(vals[i, j]):
                ax.text(
                    (xbins[j] + xbins[j + 1]) / 2,
                    (ybins[i] + ybins[i + 1]) / 2,
                    vals[i, j].round(2),
                    color="black" if 0.1 * vmax < vals[i, j] < 0.9 * vmax else "white",
                    ha="center",
                    va="center",
                    fontsize=fontsize,
                )

    ax.set_title(title, x=0.35, y=1.005)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    hep.cms.label(ax=ax, data=True, year=year, lumi=utils.LUMI[year] / 1e3)

    return mesh


def plot_all_wrapper(tbins, xbins, ybins, w, vmax, year, name, bins="txbb"):
    plt.rcParams.update({"font.size": 36})
    fig, axs = plt.subplots(2, 2, figsize=(18 * 2, 17 * 2), constrained_layout=True)

    for k in range(4):
        row = k // 2
        col = k % 2
        ax = axs[row][col]

        match bins:
            case "txbb":
                title = rf"{tbins[k]:.2f} < $T_{{Xbb}}$ < {tbins[k + 1]:.2f}"
            case "pt":
                title = rf"{tbins[k]:.0f} < Jet 2 $p_T$ < {tbins[k + 1]:.0f} GeV"

        mesh = mesh2d(ax, xbins, ybins, w[k], vmax, title, year, fontsize=28)

    for i in range(2):
        fig.colorbar(mesh, ax=axs[i].ravel().tolist(), pad=0.01)

    plt.savefig(f"{plot_dir}/{name}.pdf", bbox_inches="tight")

### Plot all tagger bins in the same figure

Txbb bins

In [None]:
# Efficiencies
for year in years:
    w, jet1txbbbins, jet1ptbins, jet1msdbins = effs_txbb[year].to_numpy()
    plot_all_wrapper(jet1txbbbins, jet1msdbins, jet1ptbins, w, 1, year, f"{year}_txbb_effs")
    # break
    plt.close()

In [None]:
# Errors
for year in years:
    _, jet1txbbbins, jet1ptbins, jet1msdbins = effs_txbb[year].to_numpy()
    w = errors_txbb[year]
    plot_all_wrapper(jet1txbbbins, jet1msdbins, jet1ptbins, w, 0.5, year, f"{year}_txbb_errs")
    # break
    plt.close()

TH4q bins

In [None]:
for year in years:
    w, jet1th4qbins, jet1ptbins, jet1msdbins = effs_th4q[year].to_numpy()

    fig, axs = plt.subplots(2, 2, figsize=(18 * 2, 14 * 2))

    for k in range(len(jet1th4qbins) - 1):
        row = k // 2
        col = k % 2
        ax = axs[row][col]

        mesh = ax.pcolormesh(jet1msdbins, jet1ptbins, w[k], cmap="turbo", vmin=0, vmax=1)
        for i in range(len(jet1ptbins) - 1):
            for j in range(len(jet1msdbins) - 1):
                if not math.isnan(w[k][i, j]):
                    ax.text(
                        (jet1msdbins[j] + jet1msdbins[j + 1]) / 2,
                        (jet1ptbins[i] + jet1ptbins[i + 1]) / 2,
                        w[k][i, j].round(2),
                        color="black" if 0.1 < w[k][i, j] < 0.9 else "white",
                        ha="center",
                        va="center",
                        fontsize=12,
                    )

        ax.set_title(
            f"{year}: {jet1th4qbins[k]:.2f} < $T_{{H4q}}$ Score < {jet1th4qbins[k + 1]:.2f}"
        )
        ax.set_xlabel("Jet 1 MassSD (GeV)")
        ax.set_ylabel("Jet 1 $p_T$ (GeV)")
        plt.colorbar(mesh, ax=ax)

    plt.savefig(
        f"{plot_dir}/{year}_th4q_effs.pdf",
        bbox_inches="tight",
    )
    # plt.close()

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(18 * 4, 14), gridspec_kw={"wspace": 0.05})

for i, year in enumerate(years):
    ax = axs[i]
    w, jet1txbbbins, jet1ptbins, jet1msdbins = effs_txbb[year].to_numpy()
    overlap = (intervals_txbb[year][0, 3] <= intervals_txbb[year][1, 2]) * (
        intervals_txbb[year][1, 3] >= intervals_txbb[year][0, 2]
    )

    mesh = ax.pcolormesh(jet1msdbins, jet1ptbins, w[3], cmap="turbo", vmin=0, vmax=1)
    for i in range(len(jet1ptbins) - 1):
        for j in range(len(jet1msdbins) - 1):
            if not math.isnan(w[3][i, j]):
                if overlap[i, j] == 0:
                    ax.text(
                        (jet1msdbins[j] + jet1msdbins[j + 1]) / 2,
                        (jet1ptbins[i] + jet1ptbins[i + 1]) / 2,
                        "X",
                        color="black" if 0.1 < w[k][i, j] < 0.9 else "white",
                        ha="center",
                        va="center",
                        fontsize=12,
                    )

    ax.set_title(f"{year}: $T_{{Xbb}}$ Intervals Overlap")
    ax.set_xlabel("Jet 1 MassSD (GeV)")
    ax.set_ylabel("Jet 1 $p_T$ (GeV)")
    plt.colorbar(mesh, ax=ax)

plt.savefig(f"{plot_dir}/txbb_overlap.pdf", bbox_inches="tight")

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(18 * 4, 14), gridspec_kw={"wspace": 0.05})

for i, year in enumerate(years):
    ax = axs[i]
    w, jet1th4qbins, jet1ptbins, jet1msdbins = effs_th4q[year].to_numpy()
    overlap = (intervals_th4q[year][0, 3] <= intervals_th4q[year][1, 2]) * (
        intervals_th4q[year][1, 3] >= intervals_th4q[year][0, 2]
    )

    mesh = ax.pcolormesh(jet1msdbins, jet1ptbins, w[3], cmap="turbo", vmin=0, vmax=1)
    for i in range(len(jet1ptbins) - 1):
        for j in range(len(jet1msdbins) - 1):
            if not math.isnan(w[3][i, j]):
                if overlap[i, j] == 0:
                    ax.text(
                        (jet1msdbins[j] + jet1msdbins[j + 1]) / 2,
                        (jet1ptbins[i] + jet1ptbins[i + 1]) / 2,
                        "X",
                        color="black" if 0.1 < w[k][i, j] < 0.9 else "white",
                        ha="center",
                        va="center",
                        fontsize=12,
                    )

    ax.set_title(f"{year}: $T_{{H4q}}$ Intervals Overlap")
    ax.set_xlabel("Jet 1 MassSD (GeV)")
    ax.set_ylabel("Jet 1 $p_T$ (GeV)")
    plt.colorbar(mesh, ax=ax)

plt.savefig(f"{plot_dir}/th4q_overlap.pdf", bbox_inches="tight")

### Plot per Txbb bin

In [None]:
for year in years:
    w, jet1txbbbins, jet1ptbins, jet1msdbins = effs_txbb[year].to_numpy()

    for k in range(len(jet1txbbbins) - 1):
        fig, ax = plt.subplots(figsize=(18, 14))
        mesh = ax.pcolormesh(jet1msdbins, jet1ptbins, w[k], cmap="turbo", vmin=0, vmax=1)
        for i in range(len(jet1ptbins) - 1):
            for j in range(len(jet1msdbins) - 1):
                if not math.isnan(w[k][i, j]):
                    ax.text(
                        (jet1msdbins[j] + jet1msdbins[j + 1]) / 2,
                        (jet1ptbins[i] + jet1ptbins[i + 1]) / 2,
                        w[k][i, j].round(2),
                        color="black" if 0.1 < w[k][i, j] < 0.9 else "white",
                        ha="center",
                        va="center",
                        fontsize=12,
                    )

        ax.set_title(
            f"{year}: {jet1txbbbins[k]:.2f} < $T_{{Xbb}}$ Score < {jet1txbbbins[k + 1]:.2f}"
        )
        ax.set_xlabel("Jet 1 MassSD (GeV)")
        ax.set_ylabel("Jet 1 $p_T$ (GeV)")
        plt.colorbar(mesh, ax=ax)
        plt.savefig(
            f"{plot_dir}/{year}_txbb_{jet1txbbbins[k]:.2f}_{jet1txbbbins[k + 1]:.2f}_effs.pdf",
            bbox_inches="tight",
        )
        plt.close()

In [None]:
for year in years:
    w, jet1th4qbins, jet1ptbins, jet1msdbins = effs_th4q[year].to_numpy()

    for k in range(len(jet1th4qbins) - 1):
        fig, ax = plt.subplots(figsize=(18, 14))
        mesh = ax.pcolormesh(jet1msdbins, jet1ptbins, w[k], cmap="turbo", vmin=0, vmax=1)
        for i in range(len(jet1ptbins) - 1):
            for j in range(len(jet1msdbins) - 1):
                if not math.isnan(w[k][i, j]):
                    ax.text(
                        (jet1msdbins[j] + jet1msdbins[j + 1]) / 2,
                        (jet1ptbins[i] + jet1ptbins[i + 1]) / 2,
                        w[k][i, j].round(2),
                        color="black" if 0.1 < w[k][i, j] < 0.9 else "white",
                        ha="center",
                        va="center",
                        fontsize=12,
                    )

        ax.set_title(
            f"{year}: {jet1th4qbins[k]:.2f} < $T_{{H4q}}$ Score < {jet1th4qbins[k + 1]:.2f}"
        )
        ax.set_xlabel("Jet 1 MassSD (GeV)")
        ax.set_ylabel("Jet 1 $p_T$ (GeV)")
        plt.colorbar(mesh, ax=ax)
        plt.savefig(
            f"{plot_dir}/{year}_th4q_{jet1th4qbins[k]:.2f}_{jet1th4qbins[k + 1]:.2f}_effs.pdf",
            bbox_inches="tight",
        )
        plt.close()

## 3D efficiencies

In [None]:
combined = {}
years = ["2018"]

for year in years:
    with open(f"trigEffs/{year}_4d_combined.pkl", "rb") as filehandler:
        combined[year] = pickle.load(filehandler)

In [None]:
rebin_pt = [250, 300, 400, 600, 800, 1000]
c_rebinned = {
    key: utils.rebin_hist(combined[year][key][:, :, :, sum, :, sum], "jet2pt", rebin_pt)
    for key in ["num", "den"]
}

In [None]:
effs = {year: c_rebinned["num"] / c_rebinned["den"] for year in years}

errors = {}
intervals_pt = {}

for year in years:
    intervals = clopper_pearson_interval(
        c_rebinned["num"].view(flow=False),
        c_rebinned["den"].view(flow=False),
    )
    errors[year] = (intervals[1] - intervals[0]) / 2
    intervals_pt[year] = intervals

In [None]:
w, jet1ptbins, jet1msdbins, jet2ptbins = effs[year][-1, :, :, 1:].to_numpy()
plot_all_wrapper(
    jet2ptbins, jet1msdbins, jet1ptbins, np.moveaxis(w, 2, 0), 1, year, f"{year}_jet2pt", bins="pt"
)

In [None]:
for i in range(4):
    w, jet1txbbbins, jet1ptbins, jet1msdbins = effs[year][:, :, :, i + 1].to_numpy()
    plot_all_wrapper(
        jet1txbbbins, jet1msdbins, jet1ptbins, w, 1, year, f"{year}_jet2pt_{i}", bins="txbb"
    )

In [None]:
for i in range(4):
    _, jet1txbbbins, jet1ptbins, jet1msdbins = effs[year][:, :, :, i + 1].to_numpy()
    w = errors[year][:, :, :, i + 1]
    plot_all_wrapper(
        jet1txbbbins, jet1msdbins, jet1ptbins, w, 1, year, f"{year}_errs_jet2pt_{i}", bins="txbb"
    )

Check statistical compatibility

In [None]:
comp = (intervals_txbb[year][0][..., None] <= intervals_pt[year][1][..., 1:]) & (
    intervals_txbb[year][1][..., None] >= intervals_pt[year][0][..., 1:]
)

In [None]:
np.mean(comp)