In [None]:
from pathlib import Path

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import mplhep as hep
from matplotlib import colors

from boostedhh import utils, hh_vars

In [None]:
# automatically reloads imported files on edits
%load_ext autoreload
%autoreload 2

In [None]:
MAIN_DIR = Path("../../../")

plot_dir = MAIN_DIR / "plots/TriggerStudy/24Nov21"
plot_dir.mkdir(parents=True, exist_ok=True)

samples_tag = "24Nov9UpdateTriggers_v12_private_signal"

In [None]:
df = pd.read_parquet(
    f"/ceph/cms/store/user/rkansal/bbtautau/skimmer/{samples_tag}/2022/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_LHEweights_TuneCP5_13p6TeV_powheg-pythia8/parquet/"
)

In [None]:
df4b = pd.read_parquet(
    f"/ceph/cms/store/user/rkansal/bbtautau/skimmer/{samples_tag}/2022/GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_LHEweights_TuneCP5_13p6TeV_powheg-pythia8/parquet"
)

In [None]:
cf = utils.get_cutflow(
    f"/ceph/cms/store/user/rkansal/bbtautau/skimmer/{samples_tag}/2022/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_LHEweights_TuneCP5_13p6TeV_powheg-pythia8/pickles",
    "2022",
    "GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_LHEweights_TuneCP5_13p6TeV_powheg-pythia8",
)

In [None]:
cf4b = utils.get_cutflow(
    f"/ceph/cms/store/user/rkansal/bbtautau/skimmer/{samples_tag}/2022/GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_LHEweights_TuneCP5_13p6TeV_powheg-pythia8/pickles/",
    "2022",
    "GluGlutoHHto4B_kl-1p00_kt-1p00_c2-0p00_LHEweights_TuneCP5_13p6TeV_powheg-pythia8",
)

In [None]:
list(df.columns)

In [None]:
cf["ak8_pt"] / cf["all"]

In [None]:
cf4b["ak8_pt"] / cf4b["all"]

In [None]:
np.mean((df["ak8FatJetPt"][1] > 250))

In [None]:
np.mean((df4b["ak8FatJetPt"][1] > 230))

In [None]:
np.sum((df["ak8FatJetPt"][1] > 250) * df["GenTauhh"][0]) / np.sum(df["GenTauhh"][0])

In [None]:
np.sum((df["ak8FatJetPt"][1] > 230) * (df["ak8FatJetPt"][0] > 250)) / np.sum(
    (df["ak8FatJetPt"][0] > 250)
)

In [None]:
for cat in ["GenTauhh", "GenTauhmu", "GenTauhe"]:
    print(
        np.sum((df["ak8FatJetPt"][0] > 250) * df[cat][0])
        / np.sum(df[cat][0])
        * (cf["ak8_pt"] / cf["all"])
    )

In [None]:
for cat in ["GenTauhh", "GenTauhmu", "GenTauhe"]:
    print(
        np.sum((df["ak8FatJetPt"][1] > 230) * (df["ak8FatJetPt"][0] > 250) * df[cat][0])
        / np.sum((df[cat][0]) * (df["ak8FatJetPt"][0] > 250))
    )

In [None]:
xsecbr = 34.13 * 0.07 * 0.88
p250eff = 0.086 * (((0.3 * 0.42) + 2 * (0.23 * 0.23)) / 0.88)
trigeff = 0.94
print(p250eff)
print(xsecbr * p250eff * trigeff)

In [None]:
p230eff = 0.086 * (((0.38 * 0.42) + 2 * (0.23 * 0.3)) / 0.88)
trigeff = 0.92
print(p230eff)
print(xsecbr * p230eff * trigeff)

In [None]:
weights = df["weight"][0]
higgs = utils.make_vector(df, name="GenHiggs")
mhh = (higgs[:, 0] + higgs[:, 1]).mass
hbbpt = higgs[df["GenHiggsChildren"] == 5].pt
httpt = higgs[df["GenHiggsChildren"] == 15].pt

In [None]:
bb_mask = df["GenHiggsDecayMode"] == 1

## Triggers

In [None]:
# Tau triggers Run 3 https://twiki.cern.ch/twiki/bin/view/CMSPublic/Run3TauHLT

pnetbb = df["HLT_AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35"][0]
pnettt = df["HLT_AK8PFJet230_SoftDropMass40_PFAK8ParticleNetTauTau0p30"][0]
pnet = pnetbb | pnettt

singletau = df["HLT_LooseDeepTauPFTauHPS180_L2NN_eta2p1"][0]

pfjet = df["HLT_AK8PFJet425_SoftDropMass40"][0]

quadjet = (
    df["HLT_QuadPFJet70_50_40_35_PFBTagParticleNet_2BTagSum0p65"][0]
    | df["HLT_QuadPFJet103_88_75_15_PFBTagDeepJet_1p3_VBF2"][0]
    | df["HLT_QuadPFJet103_88_75_15_DoublePFBTagDeepJet_1p3_7p7_VBF1"][0]
)

ditau = (
    df["HLT_DoubleMediumDeepTauPFTauHPS35_L2NN_eta2p1"][0]
    # | df["HLT_DoubleMediumDeepTauIsoPFTauHPS30_L2NN_eta2p1_PFJet60"][0]
    | df["HLT_DoubleMediumDeepTauPFTauHPS30_L2NN_eta2p1_PFJet60"][0]
    | df["HLT_DoubleMediumDeepTauPFTauHPS30_L2NN_eta2p1_PFJet75"][0]
)

singlemuon = df["HLT_IsoMu24"][0] | df["HLT_Mu50"][0]

mutau = (
    df["HLT_IsoMu20_eta2p1_LooseDeepTauPFTauHPS27_eta2p1_CrossL1"][0]
    | df["HLT_IsoMu24_eta2p1_LooseDeepTauPFTauHPS180_eta2p1"][0]
    | df["HLT_IsoMu24_eta2p1_LooseDeepTauPFTauHPS30_eta2p1_CrossL1"][0]
    | df["HLT_IsoMu24_eta2p1_MediumDeepTauPFTauHPS35_L2NN_eta2p1_CrossL1"][0]
    | df["HLT_IsoMu24_eta2p1_MediumDeepTauPFTauHPS30_L2NN_eta2p1_CrossL1"][0]
    | df["HLT_IsoMu24_eta2p1_MediumDeepTauPFTauHPS45_L2NN_eta2p1_CrossL1"][0]
    | df["HLT_IsoMu20_eta2p1_TightChargedIsoPFTauHPS27_eta2p1_CrossL1"][0]
    | df["HLT_IsoMu20_eta2p1_TightChargedIsoPFTauHPS27_eta2p1_TightID_CrossL1"][0]
)

egamma = (
    df["HLT_Ele30_WPTight_Gsf"][0]
    | df["HLT_Ele115_CaloIdVT_GsfTrkIdT"][0]
    | df["HLT_Ele50_CaloIdVT_GsfTrkIdT_PFJet165"][0]
    | df["HLT_Photon200"][0]
)

etau = (
    df["HLT_Ele24_eta2p1_WPTight_Gsf_LooseDeepTauPFTauHPS30_eta2p1_CrossL1"][0]
    | df["HLT_Ele24_eta2p1_WPTight_Gsf_TightChargedIsoPFTauHPS30_eta2p1_CrossL1"][0]
)

triggers_dict = {
    "tauhh": {
        "mask": df["GenTauhh"][0],
        "label": r"$\tau_h\tau_h$",
        "triggers": {
            "PNetBB": pnetbb,
            "PNetTauTau": pnettt,
            "PNetBB | TauTau": pnet,
            "PNetBB | TauTau | SingleTau | Di-tau": pnet | singletau | ditau,
            "PNetBB | TauTau | PFJet425 | Quad-jet": pnet | pfjet | quadjet,
            "PNetBB | TauTau | PFJet425 | Quad-jet | SingleTau": pnet | pfjet | quadjet | singletau,
            "PNetBB | TauTau | PFJet425 | Quad-jet | Di-tau": pnet | pfjet | quadjet | ditau,
            "PNetBB | TauTau | PFJet425 | SingleTau | Di-tau": pnet | pfjet | singletau | ditau,
            "All": pnet | pfjet | quadjet | singletau | ditau,
        },
    },
    "tauhh_minus": {
        "mask": df["GenTauhh"][0],
        "label": r"$\tau_h\tau_h$",
        "triggers": {
            "PNetBB | TauTau | PFJet425 | Quad-jet | SingleTau | Di-tau": pnet
            | pfjet
            | quadjet
            | singletau
            | ditau,
            "-PNetBB": pnettt | pfjet | quadjet | singletau | ditau,
            "-PNetTauTau": pnetbb | pfjet | quadjet | singletau | ditau,
            "-PFJet425": pnet | quadjet | singletau | ditau,
            "-Quad-jet": pnet | pfjet | singletau | ditau,
            "-SingleTau": pnet | pfjet | quadjet | ditau,
            "-Di-tau": pnet | pfjet | quadjet | singletau,
        },
    },
    "tauhmu": {
        "mask": df["GenTauhmu"][0],
        "label": r"$\tau_h\mu$",
        "triggers": {
            "PNetBB": pnetbb,
            "PNetTauTau": pnettt,
            "PNetBB | TauTau": pnet,
            "Muon": singlemuon,
            "Mu-tau": mutau,
            "SingleTau": singletau,
            "Di-tau": ditau,
            # "Mu-tau | SingleTau | Di-tau": mutau | singletau | ditau,
            # "Muon | Mu-tau": singlemuon | mutau,
            # "PNetBB | TauTau | Muon": pnet | singlemuon,
            "PNetBB | TauTau | Muon | Mu-tau | SingleTau | Di-tau | PFJet425": pnet
            | singlemuon
            | mutau
            | singletau
            | ditau
            | pfjet,
        },
    },
    "tauhmu_minus": {
        "mask": df["GenTauhmu"][0],
        "label": r"$\tau_h\mu$",
        "triggers": {
            "PNetBB | TauTau | Muon | Mu-tau | SingleTau | Di-tau | PFJet425": pnet
            | singlemuon
            | mutau
            | singletau
            | ditau
            | pfjet,
            "-PNetBB": pnettt | singlemuon | mutau | singletau | ditau | pfjet,
            "-PNetTauTau": pnetbb | singlemuon | mutau | singletau | ditau | pfjet,
            "-Muon": pnet | mutau | singletau | ditau | pfjet,
            "-Mu-tau": pnet | singlemuon | singletau | ditau | pfjet,
            "-SingleTau": pnet | singlemuon | mutau | ditau | pfjet,
            "-Di-tau": pnet | singlemuon | mutau | singletau | pfjet,
            "-PFJet425": pnet | singlemuon | mutau | singletau | ditau,
        },
    },
    "tauhe": {
        "mask": df["GenTauhe"][0],
        "label": r"$\tau_he$",
        "triggers": {
            "PNetBB": pnetbb,
            "PNetTauTau": pnettt,
            "PNetBB | TauTau": pnet,
            "EGamma": egamma,
            "e-tau": etau,
            "SingleTau": singletau,
            "Di-tau": ditau,
            "PNetBB | TauTau | EGamma | e-tau | SingleTau | Di-tau | PFJet425": pnet
            | egamma
            | etau
            | singletau
            | ditau
            | pfjet,
        },
    },
    "tauhe_minus": {
        "mask": df["GenTauhe"][0],
        "label": r"$\tau_he$",
        "triggers": {
            "PNetBB | TauTau | EGamma | e-tau | SingleTau | Di-tau | PFJet425": pnet
            | egamma
            | etau
            | singletau
            | ditau
            | pfjet,
            "-PNetBB": pnettt | egamma | etau | singletau | ditau | pfjet,
            "-PNetTauTau": pnetbb | egamma | etau | singletau | ditau | pfjet,
            "-EGamma": pnet | etau | singletau | ditau | pfjet,
            "-e-tau": pnet | egamma | singletau | ditau | pfjet,
            "-SingleTau": pnet | egamma | etau | ditau | pfjet,
            "-Di-tau": pnet | egamma | etau | singletau | pfjet,
            "-PFJet425": pnet | egamma | etau | singletau | ditau,
        },
    },
}

In [None]:
plt.rcParams.update({"font.size": 14})

plot_vars = [
    (mhh, "mhh", r"$m_{HH}$ [GeV]", np.linspace(250, 1500, 30)),
    (hbbpt, "hbbpt", r"Hbb $p_{T}$ [GeV]", np.linspace(230, 500, 20)),
    (httpt, "httpt", r"H$\tau\tau$ $p_{T}$ [GeV]", np.linspace(230, 500, 20)),
]

for cat, vals in triggers_dict.items():
    for kinvar, kinname, kinlabel, bins in plot_vars:
        (mask, label, triggers) = vals.values()

        fig, (ax, rax) = plt.subplots(
            2, 1, figsize=(12, 14), gridspec_kw=dict(height_ratios=[4, 1], hspace=0.07), sharex=True
        )

        hists = {"Preselection": np.histogram(kinvar[mask], bins=bins, weights=weights[mask])}
        ratios = {}

        hep.histplot(
            hists["Preselection"],
            yerr=False,
            label="Preselection",
            ax=ax,
        )

        colours = plt.cm.tab10.colors[1:]

        for key, c in zip(triggers.keys(), colours):
            # print(key)
            hists[key] = np.histogram(
                kinvar[mask & triggers[key]], bins=bins, weights=weights[mask & triggers[key]]
            )
            ratios[key] = hists[key][0] / hists["Preselection"][0]

            hep.histplot(
                hists[key],
                yerr=False,
                label=key,
                ax=ax,
                color=c,
            )

            hep.histplot(
                (ratios[key], bins),
                yerr=False,
                label=key,
                ax=rax,
                histtype="errorbar",
                color=c,
                # markersize=20,
                linestyle="--",
                # markeredgewidth=3,
                # fillstyle='none',
            )

        ax.set_ylabel("Events [A.U.]")
        ax.legend()
        ax.set_title(label)
        ax.set_xlim(bins[0], bins[-1])
        ax.set_ylim(0)

        # rax.legend()
        rax.grid(axis="y")
        rax.set_xlabel(kinlabel)
        rax.set_ylabel("Triggered / Preselection")

        ylims = [0.5, 1] if (cat.endswith("minus") and kinname != "mhh") else [0, 1]
        rax.set_ylim(ylims)

        hep.cms.label(ax=ax, data=False, year=2022, com="13.6")

        plt.savefig(plot_dir / f"{kinname}_{cat}.pdf", bbox_inches="tight")
        plt.show()

In [None]:
cats = [cat for cat in triggers_dict.keys() if not cat.endswith("minus")]
boostedsels = {
    "1 boosted jet (> 250)": df["ak8FatJetPt"][0] > 250,
    "2 boosted jets (> 250)": (df["ak8FatJetPt"][0] > 250) & (df["ak8FatJetPt"][1] > 250),
    "2 boosted jets (>250, >230)": (df["ak8FatJetPt"][0] > 240) & (df["ak8FatJetPt"][1] > 230),
    "2 boosted jets (> 300)": (df["ak8FatJetPt"][0] > 300) & (df["ak8FatJetPt"][1] > 300),
}

for cat in cats:
    trig_table = pd.DataFrame(index=list(boostedsels.keys()))
    mask = triggers_dict[cat]["mask"]

    for tkey, tsel in triggers_dict[f"{cat}_minus"]["triggers"].items():
        effs = []
        for bkey, sel in boostedsels.items():
            eff = np.sum(mask & sel & tsel) / np.sum(mask & sel)
            effs.append(f"{eff * 100:.1f}")

        ttkey = tkey.replace("- ", "-") if tkey.startswith("-") else "All"
        trig_table[ttkey] = effs

    trig_table.to_csv(plot_dir / f"trig_effs_{cat}.csv")
    print(trig_table)

In [None]:
mask = triggers_dict["tauhh"]["mask"]
print(
    np.sum(triggers_dict["tauhh"]["triggers"]["ParticleNetBB | PFJet425 | Di-tau"][mask])
    / np.sum(mask)
)
print(
    np.sum(
        ((triggers_dict["tauhh"]["triggers"]["ParticleNetBB | PFJet425 | Di-tau"]) * twoboosted)[
            mask
        ]
    )
    / np.sum((mask * twoboosted))
)

In [None]:
mask = triggers_dict["tauhmu"]["mask"]
print(
    np.sum(triggers_dict["tauhmu"]["triggers"]["ParticleNetBB | IsoMu24 | Mu50 | Mu-tau+Jet"][mask])
    / np.sum(mask)
)
print(
    np.sum(
        (
            (triggers_dict["tauhmu"]["triggers"]["ParticleNetBB | IsoMu24 | Mu50 | Mu-tau+Jet"])
            * twoboosted
        )[mask]
    )
    / np.sum((mask * twoboosted))
)

In [None]:
mask = triggers_dict["tauhe"]["mask"]
print(
    np.sum(triggers_dict["tauhe"]["triggers"]["ParticleNetBB | Electron | E-tau"][mask])
    / np.sum(mask)
)
print(
    np.sum(
        ((triggers_dict["tauhe"]["triggers"]["ParticleNetBB | Electron | E-tau"]) * twoboosted)[
            mask
        ]
    )
    / np.sum((mask * twoboosted))
)

4b triggers

In [None]:
bbbb_triggered = (
    df4b["HLT_AK8PFJet250_SoftDropMass40_PFAK8ParticleNetBB0p35"][0]
    | df4b["HLT_AK8PFJet425_SoftDropMass40"][0]
)
higgs = utils.make_vector(df4b, name="GenHiggs")
mhh = (higgs[:, 0] + higgs[:, 1]).mass
weights = df4b["weight"][0]

In [None]:
plt.rcParams.update({"font.size": 14})

plot_vars = [
    (mhh, "mhh", r"$m_{HH}$ [GeV]", np.linspace(250, 1500, 30)),
    (higgs[:, 0].pt, "hbbpt1", r"H$_1$ $p_{T}$ [GeV]", np.linspace(230, 500, 20)),
    (higgs[:, 1].pt, "hbbpt2", r"H$_2$ $p_{T}$ [GeV]", np.linspace(230, 500, 20)),
]

cat = "bbbb"
(mask, label, triggers) = (
    np.ones(len(df4b)).astype(bool),
    "HH4b",
    {"ParticleNetBB | PFJet425 ": bbbb_triggered},
)


for kinvar, kinname, kinlabel, bins in plot_vars:
    fig, (ax, rax) = plt.subplots(
        2, 1, figsize=(12, 14), gridspec_kw=dict(height_ratios=[4, 1], hspace=0.07), sharex=True
    )

    hists = {"Preselection": np.histogram(kinvar[mask], bins=bins, weights=weights[mask])}
    ratios = {}

    hep.histplot(
        hists["Preselection"],
        yerr=False,
        label="Preselection",
        ax=ax,
    )

    colours = plt.cm.tab10.colors[1:]

    for key, c in zip(triggers.keys(), colours):
        # print(key)
        hists[key] = np.histogram(
            kinvar[mask & triggers[key]], bins=bins, weights=weights[mask & triggers[key]]
        )
        ratios[key] = hists[key][0] / hists["Preselection"][0]

        hep.histplot(
            hists[key],
            yerr=False,
            label=key,
            ax=ax,
            color=c,
        )

        hep.histplot(
            (ratios[key], bins),
            yerr=False,
            label=key,
            ax=rax,
            histtype="errorbar",
            color=c,
            # markersize=20,
            linestyle="--",
            # markeredgewidth=3,
            # fillstyle='none',
        )

    ax.set_ylabel("Events [A.U.]")
    ax.legend()
    ax.set_title(label)
    ax.set_xlim(bins[0], bins[-1])
    ax.set_ylim(0)

    # rax.legend()
    rax.grid(axis="y")
    rax.set_xlabel(kinlabel)
    rax.set_ylabel("Triggered / Preselection")

    ylims = [0.5, 1] if (cat.endswith("minus") and kinname != "mhh") else [0, 1]
    rax.set_ylim(ylims)

    hep.cms.label(ax=ax, data=False, year=2022, com="13.6")

    plt.savefig(plot_dir / f"{kinname}_{cat}.pdf", bbox_inches="tight")
    plt.show()

In [None]:
plt.rcParams.update({"font.size": 14})


fig, (ax, rax) = plt.subplots(
    2, 1, figsize=(12, 14), gridspec_kw=dict(height_ratios=[4, 1], hspace=0.07), sharex=True
)

bins = np.linspace(250, 1500, 30)
hists = {"Preselection": np.histogram(mhh[mask], bins=bins, weights=weights[mask])}
ratios = {}

hep.histplot(
    hists["Preselection"],
    yerr=False,
    label="Preselection",
    ax=ax,
)

colours = plt.cm.tab10.colors[1:]

for key, c in zip(triggers.keys(), colours):
    print(key)
    hists[key] = np.histogram(
        mhh[mask & triggers[key]], bins=bins, weights=weights[mask & triggers[key]]
    )
    ratios[key] = hists[key][0] / hists["Preselection"][0]

    hep.histplot(
        hists[key],
        yerr=False,
        label=key,
        ax=ax,
        color=c,
    )

    hep.histplot(
        (ratios[key], bins),
        yerr=False,
        label=key,
        ax=rax,
        histtype="errorbar",
        color=c,
        capsize=4,
    )

ax.set_ylabel("Events [A.U.]")
ax.legend()
ax.set_title(label)

rax.legend()
rax.grid(axis="y")
rax.set_xlabel(r"$m_{HH}$ [GeV]")
rax.set_ylabel("Triggered / Preselection")
rax.set_ylim(0, 1)

hep.cms.label(ax=ax, data=False, year=2022, com="13.6")

plt.savefig(plot_dir / f"mhh_{cat}.pdf", bbox_inches="tight")
plt.show()

In [None]:
twoboosted = df4b["ak8FatJetPt"][1] > 300
print(np.mean(bbbb_triggered))
print(np.sum(bbbb_triggered * twoboosted) / np.sum((twoboosted)))

In [None]:
ptaus = df["ak8FatJetParTXtauhtauh"] + df["ak8FatJetParTXtauhtaue"] + df["ak8FatJetParTXtauhtaum"]
plt.hist(ptaus[1], np.linspace(0, 1, 100))
# df["ak8FatJetParTXtauhtauh"]

## Mass

In [None]:
cats = [cat for cat in triggers_dict.keys() if not cat.endswith("minus")]
bins = np.linspace(0, 200, 50)

for cat in cats:
    mask = triggers_dict[cat]["mask"]
    label = triggers_dict[cat]["label"]

    fig, ax = plt.subplots(1, 1, figsize=(8, 6))

    for mkey, style, mlabel in zip(
        ["ak8FatJetMsd", "ak8FatJetPNetmassLegacy"], ["-", "--"], ["SoftDrop", "PNetLegacy"]
    ):
        for i in range(3):
            ax.hist(
                df[mkey][mask][i].values,
                bins,
                histtype="step",
                weights=weights[mask],
                label=f"FatJet {i + 1} {mlabel}",
                linewidth=2,
                linestyle=style,
                color=plt.cm.tab10.colors[i],
            )

    ax.set_xlabel("Mass [GeV]")
    ax.set_ylabel("Weighted events")
    ax.set_title(label)
    ax.set_xlim(bins[0], bins[-1])
    ax.legend()
    hep.cms.label(ax=ax, data=False, year=2022, com="13.6", fontsize=16)