In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import mplhep as hep
import matplotlib.ticker as mticker
import numpy as np

import uproot
import awkward as ak
from coffea import nanoevents

from coffea.nanoevents.methods.base import NanoEventsArray
from coffea.analysis_tools import Weights, PackedSelection
from coffea.nanoevents.methods import nanoaod
from coffea.nanoevents.methods import vector
from coffea.lookup_tools.dense_lookup import dense_lookup

from HHbbVV.processors.utils import pad_val

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
plt.rcParams.update({"font.size": 24})

import warnings

warnings.filterwarnings("ignore")

In [None]:
from datetime import datetime
from pathlib import Path

MAIN_DIR = Path("../../../")
samples_dir = MAIN_DIR / "../data/skimmer/24Mar5AllYears"
# samples_dir = "/ceph/cms/store/user/annava/projects/HHbbVV/24Mar5AllYears"
year = "2016"

# date = "24Mar6"
date = datetime.now().strftime("%Y%m%d")
plot_dir = MAIN_DIR / f"plots/Kinematics/{date}/"
plot_dir.mkdir(parents=True, exist_ok=True)

In [None]:
Z_PDGID = 23
W_PDGID = 24
HIGGS_PDGID = 25
b_PDGID = 5
GEN_FLAGS = ["fromHardProcess", "isLastCopy"]

In [None]:
MATCHING_DR: float = 0.4  # deltaR for matching jets to gen particles

Look at single SM VBF HH signal NanoAOD file

In [None]:
events = nanoevents.NanoEventsFactory.from_root(
    # "root://cmseos.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2018/HH/VBF_HHTobbVV_CV_1_C2V__C3_1_TuneCP5_13TeV-madgraph-pythia8/VBF_HHTobbVV_CV_1_C2V_1_C3_1/220808_150149/0000/nano_mc2018_1-1.root",
    "root://cmseos.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2018/HH/VBF_HHTobbVV_CV_1_C2V_0_C3_1_TuneCP5_13TeV-madgraph-pythia8/VBF_HHTobbVV_CV_1_C2V_0_C3_1/220808_150000/0000/nano_mc2018_1-1.root",
    schemaclass=nanoevents.NanoAODSchema,
).events()

Get generator-level Higgs and Vs

In [None]:
higgs = events.GenPart[
    (abs(events.GenPart.pdgId) == HIGGS_PDGID) * events.GenPart.hasFlags(GEN_FLAGS)
]

higgs_children = higgs.children

# finding bb and VV children
is_bb = abs(higgs_children.pdgId) == b_PDGID
is_VV = (abs(higgs_children.pdgId) == W_PDGID) + (abs(higgs_children.pdgId) == Z_PDGID)

Hbb = higgs[ak.any(is_bb, axis=2)]
HVV = higgs[ak.any(is_VV, axis=2)]

# make sure we're only getting one Higgs
Hbb = ak.pad_none(Hbb, 1, axis=1)[:, 0]
HVV = ak.pad_none(HVV, 1, axis=1)[:, 0]

vs = events.GenPart[((abs(events.GenPart.pdgId) == 24)) * events.GenPart.hasFlags(GEN_FLAGS)]

# vbf output quarks are always at index 4, 5
gen_quarks = events.GenPart[events.GenPart.hasFlags(["isHardProcess"])][:, 4:6]

# AK8 Selections

In [None]:
# ak8 jet preselection
preselection = {  # noqa: RUF012
    "pt": 300.0,
    "eta": 2.4,
    "VVmsd": 50,
    # "VVparticleNet_mass": [50, 250],
    # "bbparticleNet_mass": [92.5, 162.5],
    "bbparticleNet_mass": 50,
    "VVparticleNet_mass": 50,
    "bbFatJetParticleNetMD_Txbb": 0.8,
    "jetId": 2,  # tight ID bit
    "DijetMass": 800,  # TODO
    # "nGoodElectrons": 0,
}

In [None]:
num_jets = 2

# AK8 selections
fatjets = events.FatJet

fatjets = ak.pad_none(
    fatjets[(fatjets.pt > 300) * (fatjets.isTight) * (np.abs(fatjets.eta) <= 2.4)], 2, axis=1
)

# particlenet xbb vs qcd
txbb = pad_val(
    fatjets.particleNetMD_Xbb / (fatjets.particleNetMD_QCD + fatjets.particleNetMD_Xbb),
    num_jets,
    axis=1,
)

# bb VV assignment
bb_mask = txbb[:, 0] >= txbb[:, 1]
bb_mask = np.stack((bb_mask, ~bb_mask)).T

In [None]:
# Leptons
electrons = events.Electron
electrons = electrons[(electrons.pt > 5) & (electrons.cutBased >= electrons.LOOSE)]

muons = events.Muon
muons = muons[(muons.pt > 7) & (muons.looseId)]

sel = ak.fill_none(
    (
        (txbb[bb_mask] > 0.97)
        * (fatjets.particleNet_H4qvsQCD[~bb_mask] > 0.6)
        * (fatjets.pt[:, 0] > 500)
        * (fatjets.pt[:, 1] > 400)
        * (np.abs(fatjets[:, 0].delta_phi(fatjets[:, 1])) > 2.6)
        * (np.abs(fatjets[:, 0].eta - fatjets[:, 1].eta) < 2.0)
    ),
    False,
)

In [None]:
jets_all = events.Jet

# Only consider events with two true VBF jets
drs = jets_all[sel].metric_table(gen_quarks[sel])
matched = ak.any(drs < MATCHING_DR, axis=2)
# select events with two true VBF jets
two_vbf = ak.sum(matched, axis=1) == 2
jets = jets_all[sel][two_vbf]

fatjets = fatjets[sel][two_vbf]
bb_mask = bb_mask[sel][two_vbf]
electrons = electrons[sel][two_vbf]
muons = muons[sel][two_vbf]
gen_quarks = gen_quarks[sel][two_vbf]
Hbb = Hbb[sel][two_vbf]
HVV = HVV[sel][two_vbf]

# Jets Kinematics

In [None]:
true_vbf_jets = jets[matched[two_vbf]]

# Sanity check
assert ak.all(ak.count(true_vbf_jets.pt, axis=1) == 2), "Not all events have two true VBF jets"

In [None]:
# mass
plt.figure(figsize=(10, 10))
plt.hist(ak.flatten(true_vbf_jets.mass), bins=np.arange(0, 50, 1), histtype="step")
plt.xlabel("$m_j$ (GeV)")
plt.ylabel("Jets")
plt.title("True VBF Jets")
plt.savefig(plot_dir / "true_vbf_jet_mass.pdf", bbox_inches="tight")

In [None]:
# pt
plt.figure(figsize=(10, 10))
plt.hist(ak.flatten(true_vbf_jets.pt), bins=np.arange(0, 600, 20), histtype="step")
plt.xlabel("$p_T$ (GeV)")
plt.ylabel("Jets")
plt.title("True VBF Jets")
plt.savefig(plot_dir / "true_vbf_jet_pt.pdf", bbox_inches="tight")
plt.show()

In [None]:
# |eta|
plt.figure(figsize=(10, 10))
plt.hist(ak.flatten(np.abs(true_vbf_jets.eta)), bins=np.arange(0, 6, 0.5), histtype="step")
plt.xlabel("$|\eta_j|$")
plt.ylabel("Jets")
plt.title("True VBF Jets")
plt.savefig(plot_dir / "true_vbf_jet_eta.pdf", bbox_inches="tight")

In [None]:
# eta_jj
plt.figure(figsize=(10, 10))
plt.hist(
    np.abs(true_vbf_jets[:, 0].eta - true_vbf_jets[:, 1].eta),
    bins=np.arange(0, 12, 0.5),
    histtype="step",
)
plt.xlabel("$\eta_{jj}$")
plt.ylabel("Events")
plt.title("True VBF Jets")
plt.savefig(plot_dir / "true_vbf_jet_eta_jj.pdf", bbox_inches="tight")
plt.show()

In [None]:
# dR(j, Hbb)
plt.figure(figsize=(10, 10))
plt.hist(
    ak.flatten(true_vbf_jets.delta_r(Hbb)),
    bins=np.arange(0, 8, 0.5),
    histtype="step",
)
plt.xlabel("$\Delta R(j, Hbb)$")
plt.ylabel("Events")
plt.title("True VBF Jets")
plt.savefig(plot_dir / "true_vbf_jet_dr_Hbb.pdf", bbox_inches="tight")
plt.show()

In [None]:
# dR(j, HVV)
plt.figure(figsize=(10, 10))
plt.hist(
    ak.flatten(true_vbf_jets.delta_r(HVV)),
    bins=np.arange(0, 8, 0.5),
    histtype="step",
)
plt.xlabel("$\Delta R(j, HVV)$")
plt.ylabel("Events")
plt.title("True VBF Jets")
plt.savefig(plot_dir / "true_vbf_jet_dr_HVV.pdf", bbox_inches="tight")
plt.show()

# AK4 Selections

In [None]:
from typing import Dict, Tuple, Union


def events_ak4_selected(
    ak4_jet_selection: Dict[str, Union[float, str]],
    jets: ak.Array,
    fatjets: ak.Array,
    bb_mask: ak.Array,
    electrons: ak.Array,
    muons: ak.Array,
    num_jets: int = 2,
) -> Tuple[ak.Array, ak.Array]:
    """
    Make selections on AK4 jets based on AK8 jets and leptons
    Required keys in `ak4_jet_selection`:
        - pt_min: float
        - pt_max: float
        - eta_min: float
        - eta_max: float
        - jetId: str
        - puId: str
        - dR_fatjetbb: float
        - dR_fatjetVV: float
    """
    vbf_jet_mask = (
        jets.isTight
        & (jets.pt >= ak4_jet_selection["pt_min"])
        & (jets.pt <= ak4_jet_selection["pt_max"])
        & (np.abs(jets.eta) <= ak4_jet_selection["eta_max"])
        & (np.abs(jets.eta) >= ak4_jet_selection["eta_min"])
        & ((jets.pt > 50) | ((jets.puId & 2) == 2))
        & (
            ak.pad_none(fatjets, num_jets, axis=1, clip=True)[bb_mask].delta_r(jets)
            > ak4_jet_selection["dR_fatjetbb"]
        )
        & (
            ak.pad_none(fatjets, num_jets, axis=1, clip=True)[~bb_mask].delta_r(jets)
            > ak4_jet_selection["dR_fatjetVV"]
        )
        & ak.all(jets.metric_table(electrons) > 0.4, axis=2)
        & ak.all(jets.metric_table(muons) > 0.4, axis=2)
    )
    jets_selected = jets[vbf_jet_mask]

    return jets_selected, vbf_jet_mask

In [None]:
ak4_jet_selection = {  # noqa: RUF012
    "pt_min": 15,  # was 25
    "pt_max": 640,  # was infty
    "eta_min": 0.25,  # was 0
    "eta_max": 4.9,  # was 4.7
    "jetId": "tight",
    "puId": "medium",
    "dR_fatjetbb": 1.0,  # was 1.2
    "dR_fatjetVV": 0.75,  # was 0.8
}

jets_ak4_selected, vbf_jet_mask = events_ak4_selected(
    ak4_jet_selection=ak4_jet_selection,
    jets=jets,
    fatjets=fatjets,
    bb_mask=bb_mask,
    electrons=electrons,
    muons=muons,
)

In [None]:
# Number of VBF-tagged jets per event
passing_per_event = ak.count(jets_ak4_selected.pt, axis=1)
plt.figure(figsize=(10, 10))
plt.hist(passing_per_event, bins=np.arange(0, 9), histtype="step")
plt.title("Number of VBF-Tagged Jets per Event")
plt.xlabel("Jets")
plt.ylabel("Events")
plt.savefig(plot_dir / "num_passing_vbf_jets.pdf", bbox_inches="tight")
plt.show()

## Comparison with an older cut

In [None]:
old_ak4_jet_selection = {  # noqa: RUF012
    "pt_min": 25,
    "pt_max": np.inf,
    "eta_min": 0,
    "eta_max": 4.7,
    "jetId": "tight",
    "puId": "medium",
    "dR_fatjetbb": 1.2,
    "dR_fatjetVV": 0.8,
}

old_jets_ak4_selected, old_vbf_jet_mask = events_ak4_selected(
    ak4_jet_selection=old_ak4_jet_selection,
    jets=jets,
    fatjets=fatjets,
    bb_mask=bb_mask,
    electrons=electrons,
    muons=muons,
)

In [None]:
def get_matching_efficiency(gen_quarks, vbf_jets, matching_dr=0.4, verbose=False):
    drs = ak.pad_none(vbf_jets, 2, axis=1)[:, :2].metric_table(gen_quarks)
    matched = drs < matching_dr
    # TODO: add overlap removal?
    matching_fraction = np.mean(np.all(np.any(matched, axis=2), axis=1))
    if verbose:
        print(f"Matching efficiency: {matching_fraction}")
    return matching_fraction


matching_efficiency_new = get_matching_efficiency(gen_quarks, jets_ak4_selected)
matching_efficiency_old = get_matching_efficiency(gen_quarks, old_jets_ak4_selected)

print(f"Matching efficiency in new selection: {matching_efficiency_new}")
print(f"Matching efficiency in old selection: {matching_efficiency_old}")

In [None]:
def get_cut_efficiency(vbf_jets, threshold=3):
    etas = pad_val(vbf_jets.eta, 2, axis=1)
    mask = np.abs(etas[:, 0] - etas[:, 1]) > threshold
    return np.mean(mask)


cut_efficiency_new = get_cut_efficiency(jets_ak4_selected)
cut_efficiency_old = get_cut_efficiency(old_jets_ak4_selected)

print(f"Signal retain rate in new selection: {cut_efficiency_new}")
print(f"Signal retain rate in old selection: {cut_efficiency_old}")

## Effects on Background

In [None]:
background = nanoevents.NanoEventsFactory.from_root(
    # a random QCD sample
    "root://cmseos.fnal.gov////store/user/lpcpfnano/cmantill/v2_3/2018/QCD/QCD_HT1500to2000_TuneCP5_PSWeights_13TeV-madgraph-pythia8/QCD_HT1500to2000_PSWeights_madgraph/220808_163124/0000/nano_mc2018_1-1.root",
    schemaclass=nanoevents.NanoAODSchema,
).events()

In [None]:
higgs_bkg = background.GenPart[
    (abs(background.GenPart.pdgId) == HIGGS_PDGID) * background.GenPart.hasFlags(GEN_FLAGS)
]

higgs_bkg_children = higgs_bkg.children

# finding bb and VV children
is_bb_bkg = abs(higgs_bkg_children.pdgId) == b_PDGID
is_VV_bkg = (abs(higgs_bkg_children.pdgId) == W_PDGID) + (abs(higgs_bkg_children.pdgId) == Z_PDGID)

Hbb_bkg = higgs_bkg[ak.any(is_bb_bkg, axis=2)]
HVV_bkg = higgs_bkg[ak.any(is_VV_bkg, axis=2)]

# make sure we're only getting one Higgs
Hbb_bkg = ak.pad_none(Hbb_bkg, 1, axis=1)[:, 0]
HVV_bkg = ak.pad_none(HVV_bkg, 1, axis=1)[:, 0]

vs_bkg = background.GenPart[
    ((abs(background.GenPart.pdgId) == 24)) * background.GenPart.hasFlags(GEN_FLAGS)
]

# vbf output quarks are always at index 4, 5
gen_quarks_bkg = background.GenPart[background.GenPart.hasFlags(["isHardProcess"])][:, 4:6]

num_jets_bkg = 2
fatjets_bkg = background.FatJet

fatjets_bkg = ak.pad_none(
    fatjets_bkg[(fatjets_bkg.pt > 300) * (fatjets_bkg.isTight) * (np.abs(fatjets_bkg.eta) <= 2.4)],
    2,
    axis=1,
)

# particlenet xbb vs qcd
txbb_bkg = pad_val(
    fatjets_bkg.particleNetMD_Xbb / (fatjets_bkg.particleNetMD_QCD + fatjets_bkg.particleNetMD_Xbb),
    num_jets_bkg,
    axis=1,
)

# bb VV assignment
bb_mask_bkg = txbb_bkg[:, 0] >= txbb_bkg[:, 1]
bb_mask_bkg = np.stack((bb_mask_bkg, ~bb_mask_bkg)).T

# Leptons
electrons_bkg = background.Electron
electrons_bkg = electrons_bkg[
    (electrons_bkg.pt > 5) & (electrons_bkg.cutBased >= electrons_bkg.LOOSE)
]

muons_bkg = background.Muon
muons_bkg = muons_bkg[(muons_bkg.pt > 7) & (muons_bkg.looseId)]

sel_bkg = ak.fill_none(
    (
        (txbb_bkg[bb_mask_bkg] > 0.97)
        * (fatjets_bkg.particleNet_H4qvsQCD[~bb_mask_bkg] > 0.6)
        * (fatjets_bkg.pt[:, 0] > 500)
        * (fatjets_bkg.pt[:, 1] > 400)
        * (np.abs(fatjets_bkg[:, 0].delta_phi(fatjets_bkg[:, 1])) > 2.6)
        * (np.abs(fatjets_bkg[:, 0].eta - fatjets_bkg[:, 1].eta) < 2.0)
    ),
    False,
)

jets_bkg = background.Jet

bkg_ak4_selected, _ = events_ak4_selected(
    ak4_jet_selection=ak4_jet_selection,
    jets=jets_bkg[sel_bkg],
    fatjets=fatjets_bkg[sel_bkg],
    bb_mask=bb_mask_bkg[sel_bkg],
    electrons=electrons_bkg[sel_bkg],
    muons=muons_bkg[sel_bkg],
)

print(f"Background retain rate in new selection: {get_cut_efficiency(bkg_ak4_selected)}")

# Selection analysis
We define true VBF jets as jets that are matched to the generator-level quarks. 
- We define fake VBF jets as jets that are not matched to the generator-level quarks.

We are interested in the following categories.
- `events_two_selected`: Events with 2 True VBF-tagged jets that pass the AK4 and $p_\mathrm{T}$ selection
- `events_one_selected_one_ak4unselected`: Events with 1 True VBF-tagged jet and 1 VBF jet that does not pass the AK4 selection.
- `events_one_selected_one_ptunselected`: Events with 1 True VBF-tagged jet and 1 VBF jet that does not pass the $p_\mathrm{T}$ selection.
- `events_zero_selected`: Events with 2 VBF jets that pass the AK4 and $p_\mathrm{T}$ selection. There are also three subcategories.
  - `events_zero_selected_two_ak4unselected`: Events with 2 VBF jets that do not pass the AK4 selection.
  - `events_zero_selected_ptunselected`: Events with 2 VBF jets that do not pass the $p_\mathrm{T}$ selection.
  - `events_zero_selected_one_ak4unselected_one_ptunselected`: Events with 2 VBF jets, one of which does not pass the AK4 selection and the other does not pass the $p_\mathrm{T}$ selection.

In [None]:
# true jets
drs = jets.metric_table(gen_quarks)
matched = ak.any(drs < MATCHING_DR, axis=2)

In [None]:
# selected VBF jets
vbf_jets_selected_ak4pt = jets[vbf_jet_mask][:, :2][matched[vbf_jet_mask][:, :2]]
# VBF jets that were not selected by AK4 selection
vbf_jets_unselected_ak4 = jets[~vbf_jet_mask][matched[~vbf_jet_mask]]
# VBF jets that were selected by AK4 selection but unselected by pT selection
vbf_jets_unselected_pt = jets[vbf_jet_mask][:, 2:][matched[vbf_jet_mask][:, 2:]]
# Fake VBF jets that were selected
fake_jets_selected = jets[vbf_jet_mask][:, :2][~matched[vbf_jet_mask][:, :2]]

In [None]:
# Generate masks for different categories of events
mask_events_two_selected = ak.count(vbf_jets_selected_ak4pt.pt, axis=1) == 2
mask_events_one_selected_one_ak4unselected = (ak.count(vbf_jets_selected_ak4pt.pt, axis=1) == 1) & (
    ak.count(vbf_jets_unselected_ak4.pt, axis=1) == 1
)
mask_events_one_selected_one_ptunselected = (ak.count(vbf_jets_selected_ak4pt.pt, axis=1) == 1) & (
    ak.count(vbf_jets_unselected_pt.pt, axis=1) == 1
)
mask_events_zero_selected = ak.count(vbf_jets_selected_ak4pt.pt, axis=1) == 0
# subcategories of mask_events_zero_selected
mask_events_zero_selected_two_ak4unselected = ak.count(vbf_jets_unselected_ak4.pt, axis=1) == 2
mask_events_zero_selected_two_ptunselected = ak.count(vbf_jets_unselected_pt.pt, axis=1) == 2
mask_events_zero_selected_one_ak4unselected_one_ptunselected = (
    ak.count(vbf_jets_unselected_ak4.pt, axis=1) == 1
) & (ak.count(vbf_jets_unselected_pt.pt, axis=1) == 1)

In [None]:
# Statistics
n_events = len(vbf_jets_selected_ak4pt)
n_events_two_selected = ak.sum(mask_events_two_selected)
n_events_one_selected_one_ak4unselected = ak.sum(mask_events_one_selected_one_ak4unselected)
n_events_one_selected_one_ptunselected = ak.sum(mask_events_one_selected_one_ptunselected)

n_events_selected = (
    n_events_two_selected
    + n_events_one_selected_one_ak4unselected
    + n_events_one_selected_one_ptunselected
)

n_events_zero_selected = ak.sum(mask_events_zero_selected)
n_events_zero_selected_two_ak4unselected = ak.sum(mask_events_zero_selected_two_ak4unselected)
n_events_zero_selected_two_ptunselected = ak.sum(mask_events_zero_selected_two_ptunselected)
n_events_zero_selected_one_ak4unselected_one_ptunselected = ak.sum(
    mask_events_zero_selected_one_ak4unselected_one_ptunselected
)

# Sanity checks
assert (
    n_events
    == n_events_two_selected
    + n_events_one_selected_one_ak4unselected
    + n_events_one_selected_one_ptunselected
    + n_events_zero_selected
), "Not all events are accounted for"

assert n_events_zero_selected == (
    n_events_zero_selected_two_ak4unselected
    + n_events_zero_selected_two_ptunselected
    + n_events_zero_selected_one_ak4unselected_one_ptunselected
), "Not all events with zero selected VBF jets are accounted for"

print(f"Number of events: {n_events}")
print(f"Number of selected events: {n_events_selected} ({n_events_selected/n_events:.2%})")
print(
    "Number of events with two selected VBF jets: "
    f"{n_events_two_selected} ({n_events_two_selected/n_events:.2%})"
)
print(
    "Number of events with one selected VBF jet and one VBF jet unselected by AK4: "
    f"{n_events_one_selected_one_ak4unselected} ({n_events_one_selected_one_ak4unselected/n_events:.2%})"
)
print(
    "Number of events with one selected VBF jet and one unselected unselected by pT: "
    f"{n_events_one_selected_one_ptunselected} ({n_events_one_selected_one_ptunselected/n_events:.2%})"
)
print(
    "Number of events with zero selected VBF jets: "
    f"{n_events_zero_selected} ({n_events_zero_selected/n_events:.2%})"
)
print(
    "  - Number of events with zero selected VBF jets with two unselected by AK4: "
    f"{n_events_zero_selected_two_ak4unselected} ({n_events_zero_selected_two_ak4unselected/n_events:.2%})"
)
print(
    "  - Number of events with zero selected VBF jets with two unselected by pT: "
    f"{n_events_zero_selected_two_ptunselected} ({n_events_zero_selected_two_ptunselected/n_events:.2%})"
)
print(
    "  - Number of events with zero selected VBF jets with and one unselected by AK4 jet and one unselected by pT: "
    f"{n_events_zero_selected_one_ak4unselected_one_ptunselected} "
    f"({n_events_zero_selected_one_ak4unselected_one_ptunselected/n_events:.2%})"
)

## Comparison with old selection

In [None]:
# selected VBF jets
old_vbf_jets_selected_ak4pt = jets[old_vbf_jet_mask][:, :2][matched[old_vbf_jet_mask][:, :2]]
# VBF jets that were not selected by AK4 selection
old_vbf_jets_unselected_ak4 = jets[~old_vbf_jet_mask][matched[~old_vbf_jet_mask]]
# VBF jets that were selected by AK4 selection but unselected by pT selection
old_vbf_jets_unselected_pt = jets[old_vbf_jet_mask][:, 2:][matched[old_vbf_jet_mask][:, 2:]]

# Generate masks for different categories of events
old_mask_events_two_selected = ak.count(old_vbf_jets_selected_ak4pt.pt, axis=1) == 2
old_mask_events_one_selected_one_ak4unselected = (
    ak.count(old_vbf_jets_selected_ak4pt.pt, axis=1) == 1
) & (ak.count(old_vbf_jets_unselected_ak4.pt, axis=1) == 1)
old_mask_events_one_selected_one_ptunselected = (
    ak.count(old_vbf_jets_selected_ak4pt.pt, axis=1) == 1
) & (ak.count(old_vbf_jets_unselected_pt.pt, axis=1) == 1)
old_mask_events_zero_selected = ak.count(old_vbf_jets_selected_ak4pt.pt, axis=1) == 0
# subcategories of mask_events_zero_selected
old_mask_events_zero_selected_two_ak4unselected = (
    ak.count(old_vbf_jets_unselected_ak4.pt, axis=1) == 2
)
old_mask_events_zero_selected_two_ptunselected = (
    ak.count(old_vbf_jets_unselected_pt.pt, axis=1) == 2
)
old_mask_events_zero_selected_one_ak4unselected_one_ptunselected = (
    ak.count(old_vbf_jets_unselected_ak4.pt, axis=1) == 1
) & (ak.count(old_vbf_jets_unselected_pt.pt, axis=1) == 1)

# Statistics
old_n_events = len(old_vbf_jets_selected_ak4pt)
old_n_events_two_selected = ak.sum(old_mask_events_two_selected)
old_n_events_one_selected_one_ak4unselected = ak.sum(old_mask_events_one_selected_one_ak4unselected)
old_n_events_one_selected_one_ptunselected = ak.sum(old_mask_events_one_selected_one_ptunselected)
old_n_events_selected = (
    old_n_events_two_selected
    + old_n_events_one_selected_one_ak4unselected
    + old_n_events_one_selected_one_ptunselected
)

old_n_events_zero_selected = ak.sum(old_mask_events_zero_selected)
old_n_events_zero_selected_two_ak4unselected = ak.sum(
    old_mask_events_zero_selected_two_ak4unselected
)
old_n_events_zero_selected_two_ptunselected = ak.sum(old_mask_events_zero_selected_two_ptunselected)
old_n_events_zero_selected_one_ak4unselected_one_ptunselected = ak.sum(
    old_mask_events_zero_selected_one_ak4unselected_one_ptunselected
)

# Sanity checks
assert (
    old_n_events
    == old_n_events_two_selected
    + old_n_events_one_selected_one_ak4unselected
    + old_n_events_one_selected_one_ptunselected
    + old_n_events_zero_selected
), "Not all events are accounted for"

assert old_n_events_zero_selected == (
    old_n_events_zero_selected_two_ak4unselected
    + old_n_events_zero_selected_two_ptunselected
    + old_n_events_zero_selected_one_ak4unselected_one_ptunselected
), "Not all events with zero selected VBF jets are accounted for"

print(f"Number of events: {old_n_events}")
print(
    f"Number of selected events: {old_n_events_selected} ({old_n_events_selected/old_n_events:.2%})"
)
print(
    "Number of events with two selected VBF jets: "
    f"{old_n_events_two_selected} ({old_n_events_two_selected/old_n_events:.2%})"
)
print(
    "Number of events with one selected VBF jet and one VBF jet unselected by AK4: "
    f"{old_n_events_one_selected_one_ak4unselected} ({old_n_events_one_selected_one_ak4unselected/old_n_events:.2%})"
)
print(
    "Number of events with one selected VBF jet and one VBF jet unselected by pT: "
    f"{old_n_events_one_selected_one_ptunselected} ({old_n_events_one_selected_one_ptunselected/old_n_events:.2%})"
)
print(
    "Number of events with zero selected VBF jets: "
    f"{old_n_events_zero_selected} ({old_n_events_zero_selected/old_n_events:.2%})"
)
print(
    "  - Number of events with zero selected VBF jets with two unselected by AK4: "
    f"{old_n_events_zero_selected_two_ak4unselected} ({old_n_events_zero_selected_two_ak4unselected/old_n_events:.2%})"
)
print(
    "  - Number of events with zero selected VBF jets with two unselected by pT: "
    f"{old_n_events_zero_selected_two_ptunselected} ({old_n_events_zero_selected_two_ptunselected/old_n_events:.2%})"
)
print(
    "  - Number of events with zero selected VBF jets with and one unselected by AK4 jet and one unselected by pT: "
    f"{old_n_events_zero_selected_one_ak4unselected_one_ptunselected} "
    f"({old_n_events_zero_selected_one_ak4unselected_one_ptunselected/old_n_events:.2%})"
)

# Selection Kinematics

## Jet-level features: jet mass, $p_\mathrm{T}$, and $\eta$

In [None]:
# mass
for ylabel, name_label, density in zip(
    ("Jets", "Jets (A.U)"),
    ("", "_density"),
    (False, True),
):
    plt.figure(figsize=(10, 10))
    bins = np.arange(0, 40, 1)
    plt.hist(
        ak.flatten(vbf_jets_selected_ak4pt.mass),
        bins=bins,
        histtype="step",
        density=density,
        label=f"True VBF Jets Selected",
    )

    plt.hist(
        ak.flatten(vbf_jets_unselected_ak4.mass),
        bins=bins,
        histtype="step",
        density=density,
        label="True VBF Jets Unselected by AK4",
    )

    plt.hist(
        ak.flatten(vbf_jets_unselected_pt.mass),
        bins=bins,
        histtype="step",
        density=density,
        label="True VBF Jets Unselected by $p_\mathrm{T}$",
    )

    plt.hist(
        ak.flatten(fake_jets_selected.mass),
        bins=bins,
        histtype="step",
        density=density,
        label=f"Fake VBF Jets Selected",
    )

    plt.legend(loc="upper right")
    plt.xlabel("$m_j$ (GeV)")
    plt.ylabel(ylabel)
    plt.savefig(plot_dir / f"selection_jet_mass{name_label}.pdf", bbox_inches="tight")
    plt.show()

In [None]:
# pt
for ylabel, name_label, density in zip(
    ("Jets", "Jets (A.U)"),
    ("", "_density"),
    (False, True),
):
    plt.figure(figsize=(10, 10))
    bins = np.arange(0, 300, 10)
    plt.hist(
        ak.flatten(vbf_jets_selected_ak4pt.pt),
        bins=bins,
        histtype="step",
        density=density,
        label=f"True VBF Jets Selected",
    )

    plt.hist(
        ak.flatten(vbf_jets_unselected_ak4.pt),
        bins=bins,
        histtype="step",
        density=density,
        label="True VBF Jets Unselected by AK4",
    )

    plt.hist(
        ak.flatten(vbf_jets_unselected_pt.pt),
        bins=bins,
        histtype="step",
        density=density,
        label="True VBF Jets Unselected by $p_\mathrm{T}$",
    )

    plt.hist(
        ak.flatten(fake_jets_selected.pt),
        bins=bins,
        histtype="step",
        density=density,
        label=f"Fake VBF Jets Selected",
    )

    plt.legend(loc="upper right")
    plt.xlabel("${p_\mathrm{T}}_j$ (GeV)")
    plt.ylabel(ylabel)
    plt.savefig(plot_dir / f"selection_jet_pt{name_label}.pdf", bbox_inches="tight")
    plt.show()

In [None]:
# |eta|
for ylabel, name_label, density in zip(
    ("Jets", "Jets (A.U)"),
    ("", "_density"),
    (False, True),
):
    plt.figure(figsize=(10, 10))
    bins = np.arange(0, 6, 0.5)
    plt.hist(
        np.abs(ak.flatten(vbf_jets_selected_ak4pt.eta)),
        bins=bins,
        histtype="step",
        density=density,
        label=f"True VBF Jets Selected",
    )

    plt.hist(
        np.abs(ak.flatten(vbf_jets_unselected_ak4.eta)),
        bins=bins,
        histtype="step",
        density=density,
        label="True VBF Jets Unselected by AK4",
    )

    plt.hist(
        np.abs(ak.flatten(vbf_jets_unselected_pt.eta)),
        bins=bins,
        histtype="step",
        density=density,
        label="True VBF Jets Unselected by $p_\mathrm{T}$",
    )

    plt.hist(
        np.abs(ak.flatten(fake_jets_selected.eta)),
        bins=bins,
        histtype="step",
        density=density,
        label=f"Fake VBF Jets Selected",
    )

    plt.legend(loc="upper right")
    plt.xlabel("$|\eta_j|$")
    plt.ylabel(ylabel)
    plt.savefig(plot_dir / f"selection_jet_eta{name_label}.pdf", bbox_inches="tight")
    plt.show()

## Event-level features: $\eta_{jj}$, $\Delta R(j, \mathrm{Hbb})$, $\Delta R(j, \mathrm{HVV})$

In [None]:
jets_ak4pt_selected_padded = ak.pad_none(jets_ak4_selected[:, :2], 2, axis=1)
jets_ak4pt_selected_two_vbf_selected = jets_ak4pt_selected_padded[mask_events_two_selected]
jets_ak4pt_selected_one_ak4_unselected = jets_ak4pt_selected_padded[
    mask_events_one_selected_one_ak4unselected
]
jets_ak4pt_selected_one_pt_unselected = jets_ak4pt_selected_padded[
    mask_events_one_selected_one_ptunselected
]
jets_ak4pt_selected_zero_vbf_selected = jets_ak4pt_selected_padded[mask_events_zero_selected]

In [None]:
# eta_jj
etas_two_selected = jets_ak4pt_selected_two_vbf_selected.eta
etas_one_selected_one_ak4unselected = jets_ak4pt_selected_one_ak4_unselected.eta
etas_one_selected_one_ptunselected = jets_ak4pt_selected_one_pt_unselected.eta
etas_zero_selected = jets_ak4pt_selected_zero_vbf_selected.eta

eta_jj_two_selected = np.abs(etas_two_selected[:, 0] - etas_two_selected[:, 1])
eta_jj_one_selected_one_ak4unselected = np.abs(
    etas_one_selected_one_ak4unselected[:, 0] - etas_one_selected_one_ak4unselected[:, 1]
)
eta_jj_one_selected_one_ptunselected = np.abs(
    etas_one_selected_one_ptunselected[:, 0] - etas_one_selected_one_ptunselected[:, 1]
)
eta_jj_zero_selected = np.abs(etas_zero_selected[:, 0] - etas_zero_selected[:, 1])

for ylabel, name_label, density in zip(
    ("Events", "Events (A.U)"),
    ("", "_density"),
    (False, True),
):
    plt.figure(figsize=(10, 10))
    bins = np.arange(0, 10, 0.5)
    plt.hist(
        eta_jj_two_selected,
        bins=bins,
        histtype="step",
        density=density,
        label=f"Two VBF Jets Selected",
    )

    plt.hist(
        eta_jj_one_selected_one_ak4unselected,
        bins=bins,
        histtype="step",
        density=density,
        label="One VBF Jet Unselected by AK4",
    )

    plt.hist(
        eta_jj_one_selected_one_ptunselected,
        bins=bins,
        histtype="step",
        density=density,
        label="One VBF Jet Unselected by $p_\mathrm{T}$",
    )

    plt.hist(
        eta_jj_zero_selected,
        bins=bins,
        histtype="step",
        density=density,
        label="Two VBF Jets Unselected",
    )

    plt.legend(loc="upper right")
    plt.xlabel("$\eta_{jj}$")
    plt.ylabel(ylabel)
    plt.savefig(plot_dir / f"selection_event_etajj{name_label}.pdf", bbox_inches="tight")
    plt.show()

In [None]:
# dR(j, Hbb)
for ylabel, name_label, density in zip(
    ("Events", "Events (A.U)"),
    ("", "_density"),
    (False, True),
):
    plt.figure(figsize=(10, 10))
    bins = np.arange(0, 8, 0.5)
    plt.hist(
        ak.flatten(jets_ak4pt_selected_two_vbf_selected.delta_r(Hbb[mask_events_two_selected])),
        bins=bins,
        histtype="step",
        density=density,
        label=f"Two VBF Jets Selected",
    )
    plt.hist(
        ak.flatten(
            jets_ak4pt_selected_one_ak4_unselected.delta_r(
                Hbb[mask_events_one_selected_one_ak4unselected]
            )
        ),
        bins=bins,
        histtype="step",
        density=density,
        label="One VBF Jet Unselected by AK4",
    )
    plt.hist(
        ak.flatten(
            jets_ak4pt_selected_one_pt_unselected.delta_r(
                Hbb[mask_events_one_selected_one_ptunselected]
            )
        ),
        bins=bins,
        histtype="step",
        density=density,
        label="One VBF Jet Unselected by $p_\mathrm{T}$",
    )
    plt.hist(
        ak.flatten(jets_ak4pt_selected_zero_vbf_selected.delta_r(Hbb[mask_events_zero_selected])),
        bins=bins,
        histtype="step",
        density=density,
        label="Two VBF Jets Unselected",
    )
    plt.legend(loc="upper right")
    plt.xlabel("$\Delta R(j, Hbb)$")
    plt.ylabel(ylabel)
    plt.savefig(plot_dir / f"selection_event_dr_hbb{name_label}.pdf", bbox_inches="tight")
    plt.show()

In [None]:
# dR(j, HVV)
for ylabel, name_label, density in zip(
    ("Events", "Events (A.U)"),
    ("", "_density"),
    (False, True),
):
    plt.figure(figsize=(10, 10))
    bins = np.arange(0, 8, 0.5)
    plt.hist(
        ak.flatten(jets_ak4pt_selected_two_vbf_selected.delta_r(HVV[mask_events_two_selected])),
        bins=bins,
        histtype="step",
        density=density,
        label=f"Two VBF Jets Selected",
    )
    plt.hist(
        ak.flatten(
            jets_ak4pt_selected_one_ak4_unselected.delta_r(
                HVV[mask_events_one_selected_one_ak4unselected]
            )
        ),
        bins=bins,
        histtype="step",
        density=density,
        label="One VBF Jet Unselected by AK4",
    )
    plt.hist(
        ak.flatten(
            jets_ak4pt_selected_one_pt_unselected.delta_r(
                HVV[mask_events_one_selected_one_ptunselected]
            )
        ),
        bins=bins,
        histtype="step",
        density=density,
        label="One VBF Jet Unselected by $p_\mathrm{T}$",
    )
    plt.hist(
        ak.flatten(jets_ak4pt_selected_zero_vbf_selected.delta_r(HVV[mask_events_zero_selected])),
        bins=bins,
        histtype="step",
        density=density,
        label="Two VBF Jets Unselected",
    )
    plt.legend(loc="upper right")
    plt.xlabel("$\Delta R(j, HVV)$")
    plt.ylabel(ylabel)
    plt.savefig(plot_dir / f"selection_event_dr_hVV{name_label}.pdf", bbox_inches="tight")
    plt.show()

## Category-Wise Study

### Events in which both VBF jets are selected

In [None]:
mask_category = mask_events_two_selected
events_category = jets[mask_category]
vbf_mask_category = vbf_jet_mask[mask_category]
ak4_selected_jets = events_category[vbf_mask_category][:, :2]
matched_category = matched[mask_category]

# Sanity check that both selected jets are true VBF jets
matched_two_selected = matched[mask_category]
true_selected = events_category[vbf_mask_category][:, :2][
    matched_two_selected[vbf_mask_category][:, :2]
]
assert ak.all(ak.count(true_selected.pt, axis=1) == 2), "Some selected jets are fake VBF jets"

In [None]:
# dR between two selected VBF jets
jet1 = true_selected[:, :1]
jet2 = true_selected[:, 1:]
dR = jet1.delta_r(jet2)
plt.figure(figsize=(10, 10))
plt.hist(
    ak.flatten(dR),
    bins=np.arange(0, 10.5, 0.5),
    histtype="step",
    density=False,
)
plt.xlabel("$\Delta R(\mathrm{True Selected}, \mathrm{True Selected})$")
plt.ylabel("Events")
plt.title("Events with Two Selected VBF Jets")
plt.savefig(
    plot_dir / "selection_two_selected_dr_trueSelected_trueSelected.pdf", bbox_inches="tight"
)
plt.show()

In [None]:
# eta_jj between two selected VBF jets
eta_jj = np.abs(true_selected[:, 0].eta - true_selected[:, 1].eta)
plt.figure(figsize=(10, 10))
plt.hist(
    eta_jj,
    bins=np.arange(0, 10.5, 0.5),
    histtype="step",
    density=False,
)
plt.xlabel("$\eta_{\mathrm{TrueSelected}, \mathrm{TrueSelected}}$")
plt.ylabel("Events")
plt.title("Events with Two Selected VBF Jets")
plt.savefig(
    plot_dir / "selection_two_selected_eta_jj_trueSelected_trueSelected.pdf", bbox_inches="tight"
)
plt.show()

### dR's of events in which one VBF jet is selected and the other is unselected by AK4

In [None]:
mask_category = mask_events_one_selected_one_ak4unselected
events_category = jets[mask_category]
matched_events_category = matched[mask_category]
vbf_jet_mask_category = vbf_jet_mask[mask_category]

In [None]:
ak.sum(mask_events_one_selected_one_ak4unselected)

In [None]:
true_selected = events_category[vbf_jet_mask_category][:, :2][
    matched_events_category[vbf_jet_mask_category][:, :2]
]
fake_selected = events_category[vbf_jet_mask_category][:, :2][
    ~matched_events_category[vbf_jet_mask_category][:, :2]
]
true_unselected = events_category[~vbf_jet_mask_category][
    matched_events_category[~vbf_jet_mask_category]
]

# Sanity check that exactly one selected jet is true VBF jet
assert ak.all(
    ak.count(true_selected.pt, axis=1) == 1
), "Not exactly one selected jet is true VBF jet"

# Sanity check that exactly one unselected jet is true VBF jet
assert ak.all(
    ak.count(true_unselected.pt, axis=1) == 1
), "Not exactly one unselected jet is true VBF jet"

In [None]:
# dR between true selected and true unselected VBF jets
jet1 = true_selected[:, :1]
jet2 = true_unselected[:, :1]
dR = jet1.delta_r(jet2)

plt.figure(figsize=(10, 10))
plt.hist(
    ak.flatten(dR),
    bins=np.arange(0, 12, 0.5),
    histtype="step",
    density=False,
)
plt.xlabel("$\Delta R(\mathrm{True Selected}, \mathrm{True Unselected By AK4})$")
plt.ylabel("Events")
plt.title("Events with One Selected and\nOne Unselected VBF Jet by AK4")
plt.savefig(
    plot_dir / "selection_one_selected_one_ak4_unselected_dr_trueSelected_trueUnselectedByAK4.pdf",
    bbox_inches="tight",
)

In [None]:
# eta_jj between true selected and true unselected VBF jets
eta_jj = np.abs(true_selected[:, 0].eta - true_unselected[:, 0].eta)
plt.figure(figsize=(10, 10))
plt.hist(
    eta_jj,
    bins=np.arange(0, 11, 0.5),
    histtype="step",
    density=False,
)
plt.xlabel("$\eta_{\mathrm{TrueSelected}, \mathrm{TrueUnselectedByAK4}}$")
plt.ylabel("Events")
plt.title("Events with One Selected and\nOne Unselected VBF Jet by AK4")
plt.savefig(
    plot_dir
    / "selection_one_selected_one_ak4_unselected_eta_jj_trueSelected_trueUnelectedByAK4.pdf",
    bbox_inches="tight",
)
plt.show()

In [None]:
# dR between true selected and fake selected VBF jets
jet1 = true_selected[:, :1]
jet2 = fake_selected[:, :1]
# filter out the case in which only one event passes the ak4 selection
jet1 = jet1[ak.count(jet2.pt, axis=1) == 1]
jet2 = jet2[ak.count(jet2.pt, axis=1) == 1]

dR = jet1.delta_r(jet2)

plt.figure(figsize=(10, 10))
plt.hist(
    ak.flatten(dR),
    bins=np.arange(0, 10, 0.5),
    histtype="step",
    density=False,
)
plt.xlabel("$\Delta R(\mathrm{True Selected}, \mathrm{Fake Selected By AK4})$")
plt.ylabel("Events")
plt.title("Events with One Selected and\nOne Fake Selected VBF Jet")
plt.savefig(
    plot_dir / "selection_one_selected_one_fake_selected_dr_trueSelected_fakeSelectedByAK4.pdf",
    bbox_inches="tight",
)

In [None]:
# eta_jj between true selected and fake selected VBF jets
eta_jj = ak.flatten(np.abs(jet1.eta - jet2.eta))
plt.figure(figsize=(10, 10))
plt.hist(
    eta_jj,
    bins=np.arange(0, 11, 0.5),
    histtype="step",
    density=False,
)
plt.xlabel("$\eta_{\mathrm{True Selected}, \mathrm{Fake Selected By AK4}}$")
plt.ylabel("Events")
plt.title("Events with One Selected and\nOne Unselected VBF Jet by AK4")
plt.savefig(
    plot_dir
    / "selection_one_selected_one_ak4_unselected_eta_jj_trueSelected_fakeSelectedByAK4.pdf",
    bbox_inches="tight",
)
plt.show()

### dR's of events with one selected and one unselected by pT

In [None]:
mask_category = mask_events_one_selected_one_ptunselected
events_category = jets[mask_category]
matched_events_category = matched[mask_category]
vbf_jet_mask_category = vbf_jet_mask[mask_category]

In [None]:
true_selected = events_category[vbf_jet_mask_category][:, :2][
    matched_events_category[vbf_jet_mask_category][:, :2]
]
fake_selected = events_category[vbf_jet_mask_category][:, :2][
    ~matched_events_category[vbf_jet_mask_category][:, :2]
]
true_unselected = events_category[vbf_jet_mask_category][:, 2:][
    matched_events_category[vbf_jet_mask_category][:, 2:]
]

# Sanity check that exactly one selected jet is true VBF jet
assert ak.all(
    ak.count(true_selected.pt, axis=1) == 1
), "Not exactly one selected jet is true VBF jet"

# Sanity check that exactly one unselected jet is true VBF jet
assert ak.all(
    ak.count(true_unselected.pt, axis=1) == 1
), "Not exactly one unselected jet is true VBF jet"

# Sanity check that exactly one fake jet is selected
# This is expected because in this category, there must events that are filtered out by the pT selection
assert ak.all(ak.count(fake_selected.pt, axis=1) == 1), "Not exactly one fake jet is selected"

In [None]:
# dR between true selected and true unselected VBF jets
jet1 = true_selected[:, :1]
jet2 = true_unselected[:, :1]
dR = jet1.delta_r(jet2)

plt.figure(figsize=(10, 10))
plt.hist(
    ak.flatten(dR),
    bins=np.arange(0, 12, 0.5),
    histtype="step",
    density=False,
)
plt.xlabel("$\Delta R(\mathrm{True Selected}, \mathrm{True Unselected By P_\mathrm{T}})$")
plt.ylabel("Events")
plt.title("Events with One Selected and\nOne Unselected VBF Jet by $p_\mathrm{T}$")
plt.savefig(
    plot_dir / "selection_one_selected_one_pt_unselected_dr_trueSelected_trueUnselectedByPt.pdf",
)

In [None]:
# eta_jj between true selected and true unselected VBF jets
eta_jj = np.abs(true_selected[:, 0].eta - true_unselected[:, 0].eta)
plt.figure(figsize=(10, 10))
plt.hist(
    eta_jj,
    bins=np.arange(0, 11, 0.5),
    histtype="step",
    density=False,
)
plt.xlabel("$\eta_{\mathrm{True Selected}, \mathrm{True Unselected By P_\mathrm{T}}}$")
plt.ylabel("Events")
plt.title("Events with One Selected and\nOne Unselected VBF Jet by $p_\mathrm{T}$")
plt.savefig(
    plot_dir / "selection_one_selected_one_pt_unselected_eta_jj_trueSelected_trueUnelectedByPt.pdf",
    bbox_inches="tight",
)
plt.show()

In [None]:
# dR between true selected and fake selected VBF jets
dR = true_selected[:, :1].delta_r(fake_selected[:, :1])

plt.figure(figsize=(10, 10))
plt.hist(
    ak.flatten(dR),
    bins=np.arange(0, 12, 0.5),
    histtype="step",
    density=False,
)
plt.xlabel("$\Delta R(\mathrm{True Selected}, \mathrm{Fake By P_\mathrm{T}})$")
plt.ylabel("Events")
plt.title("Events with One Selected and\nOne Unselected VBF Jet by $p_\mathrm{T}$")
plt.savefig(
    plot_dir / "selection_one_selected_one_pt_unselected_dr_trueSelected_fakeSelectedByPt.pdf",
)

In [None]:
eta_jj = np.abs(true_selected[:, 0].eta - fake_selected[:, 0].eta)
plt.figure(figsize=(10, 10))
plt.hist(
    eta_jj,
    bins=np.arange(0, 11, 0.5),
    histtype="step",
    density=False,
)
plt.xlabel("$\eta_{\mathrm{True Selected}, \mathrm{Fake Selected By P_\mathrm{T}}}$")
plt.ylabel("Events")
plt.title("Events with One Selected and\nOne Unselected VBF Jet by $p_\mathrm{T}$")
plt.savefig(
    plot_dir / "selection_one_selected_one_pt_unselected_eta_jj_trueSelected_fakeSelectedByPt.pdf",
    bbox_inches="tight",
)
plt.show()

# Further Analysis of $p_\mathrm{T}$ Selection
We want to study the $p_\mathrm{T}$ ranks of jets that are selected wrongly and missed by the AK4 selection.

In [None]:
mask_category = mask_events_one_selected_one_ptunselected
events_category = jets[mask_category]
matched_events_category = matched[mask_category]
vbf_jet_mask_category = vbf_jet_mask[mask_category]

In [None]:
true_selected_lists = []
true_unselected_by_pt_lists = []
false_selected_lists = []
for i in range(len(events_category)):
    ak4_mask = vbf_jet_mask_category[i]
    matched_mask = matched_events_category[i]

    true_selected_list = []
    true_unselected_by_pt_list = []
    false_selected_list = []
    ak4_jet_idx = 0
    for j in range(len(ak4_mask)):
        if ak4_mask[j] and matched_mask[j]:
            # True selected by ak4
            if ak4_jet_idx >= 2:
                # Will be unselected by pt
                true_unselected_by_pt_list.append(ak4_jet_idx)
            else:
                # Will be selected by pt
                true_selected_list.append(ak4_jet_idx)
        elif ak4_mask[j] and not matched_mask[j]:
            # False selected by ak4
            if ak4_jet_idx <= 1:
                # Will be selected by pt
                false_selected_list.append(ak4_jet_idx)
            else:
                # Will be unselected by pt
                pass

        if ak4_mask[j]:
            ak4_jet_idx += 1
    true_selected_lists.append(true_selected_list)
    true_unselected_by_pt_lists.append(true_unselected_by_pt_list)
    false_selected_lists.append(false_selected_list)

true_selected_lists = ak.flatten(ak.Array(true_selected_lists))
true_unselected_by_pt_lists = ak.flatten(ak.Array(true_unselected_by_pt_lists))
false_selected_lists = ak.flatten(ak.Array(false_selected_lists))

In [None]:
for density, ylabel, plot_label in zip(
    (False, True),
    ("Jets", "Jets (A.U.)"),
    ("", "_density"),
):
    plt.figure(figsize=(10, 10))
    plt.hist(
        true_selected_lists + 1,
        bins=np.arange(1, 9, 1),
        histtype="step",
        density=density,
        label="True Selected",
    )
    plt.hist(
        true_unselected_by_pt_lists + 1,
        bins=np.arange(1, 9, 1),
        histtype="step",
        density=density,
        label="True Unselected by $p_T$",
    )
    plt.hist(
        false_selected_lists + 1,
        bins=np.arange(1, 9, 1),
        histtype="step",
        density=density,
        label="False Selected",
    )
    plt.xlabel("Rank by $p_T$ in VBF-Tagged Jets (Counting from 1)")
    plt.ylabel(ylabel)
    plt.legend(loc="upper right")
    plt.title("Events with One Selected and\nOne Unselected VBF Jet by $p_\mathrm{T}$")
    plt.savefig(
        plot_dir / f"selection_pt_rank{plot_label}.pdf",
        bbox_inches="tight",
    )
    plt.show()