Studying efficiency / mistag rate of a veto of the semi-resolved X-HY channel

In [None]:
import uproot
import awkward as ak
from coffea import nanoevents
from coffea.nanoevents.methods.base import NanoEventsArray
from coffea.analysis_tools import Weights, PackedSelection
from coffea.nanoevents.methods import nanoaod
from coffea.nanoevents.methods import vector
from coffea.lookup_tools.dense_lookup import dense_lookup

ak.behavior.update(vector.behavior)

import pickle, json, gzip
import numpy as np

from typing import Optional, List, Dict
from copy import copy

import matplotlib.pyplot as plt
import mplhep as hep
from matplotlib import colors

from utils import pad_val

In [None]:
events = nanoevents.NanoEventsFactory.from_root(
    "root://cmseos.fnal.gov///store/user/lpcpfnano/ammitra/v2_3/2017/XHY/NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250_TuneCP5_13TeV-madgraph-pythia8/NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250/230323_182603/0000/nano_mc2017_1-1.root",
    schemaclass=nanoevents.NanoAODSchema,
).events()

In [None]:
fatjets = events.FatJet
txbb = fatjets.particleNetMD_Xbb / (fatjets.particleNetMD_QCD + fatjets.particleNetMD_Xbb)
Wqq_score = (fatjets.particleNetMD_Xqq + fatjets.particleNetMD_Xcc) / (
    fatjets.particleNetMD_Xqq + fatjets.particleNetMD_Xcc + fatjets.particleNetMD_QCD
)

In [None]:
wtg2 = ak.sum(Wqq_score[:, :3] >= 0.8, axis=1) >= 2
sorted_wqq_score = np.argsort(pad_val(Wqq_score, 3, 0, 1), axis=1)
lowest_wqq_index = np.argsort(pad_val(Wqq_score, 3, 0, 1), axis=1)[:, 0]
ltxbb = pad_val(txbb, 3, 0, 1)[np.arange(len(fatjets)), lowest_wqq_index]
passveto = wtg2 & (ltxbb >= 0.98)

In [None]:
np.mean(passveto)

In [None]:
fj1 = fatjets[passveto][np.arange(len(fatjets[passveto])), sorted_wqq_score[passveto][:, 2]]
fj2 = fatjets[passveto][np.arange(len(fatjets[passveto])), sorted_wqq_score[passveto][:, 1]]
fj3 = fatjets[passveto][np.arange(len(fatjets[passveto])), sorted_wqq_score[passveto][:, 0]]

In [None]:
np.stack((fj1.pt, fj2.pt)).to_numpy().T

In [None]:
plt.hist(fj1.delta_r(fj2), np.arange(0, 4, 0.2), histtype="step")
plt.xlabel("dR between W-tagged fatjets")
plt.show()

In [None]:
plt.figure(figsize=(8, 8))
plt.hist((fj1 + fj2).mass, np.arange(0, 4000, 400), histtype="step")

In [None]:
d_PDGID = 1
u_PDGID = 2
s_PDGID = 3
c_PDGID = 4
b_PDGID = 5
g_PDGID = 21
TOP_PDGID = 6

ELE_PDGID = 11
vELE_PDGID = 12
MU_PDGID = 13
vMU_PDGID = 14
TAU_PDGID = 15
vTAU_PDGID = 16

G_PDGID = 22
Z_PDGID = 23
W_PDGID = 24
HIGGS_PDGID = 25
Y_PDGID = 35

b_PDGIDS = [511, 521, 523]

GRAV_PDGID = 39

GEN_FLAGS = ["fromHardProcess", "isLastCopy"]

In [None]:
higgs = events.GenPart[
    (abs(events.GenPart.pdgId) == HIGGS_PDGID) * events.GenPart.hasFlags(GEN_FLAGS)
]
is_bb = abs(higgs.children.pdgId) == b_PDGID
has_bb = ak.sum(ak.flatten(is_bb, axis=2), axis=1) == 2

bb = ak.flatten(higgs.children[is_bb], axis=2)

# gen Y and kids
Ys = events.GenPart[(abs(events.GenPart.pdgId) == Y_PDGID) * events.GenPart.hasFlags(GEN_FLAGS)]
is_VV = (abs(Ys.children.pdgId) == W_PDGID) + (abs(Ys.children.pdgId) == Z_PDGID)
has_VV = ak.sum(ak.flatten(is_VV, axis=2), axis=1) == 2

VV = ak.flatten(Ys.children[is_VV], axis=2)

In [None]:
plt.hist(VV[passveto][:, 0].delta_r(VV[passveto][:, 1]), np.arange(0, 4, 0.2), histtype="step")
plt.xlabel("dR between gen Ws")
plt.show()

In [None]:
plt.hist(
    np.min((fj1.delta_r(Ys[passveto]), fj2.delta_r(Ys[passveto])), axis=0),
    np.arange(0, 4, 0.2),
    histtype="step",
)
plt.xlabel("dR between closer w-tagged fatjet and gen Y")
plt.show()

In [None]:
plt.hist(
    np.max((fj1.delta_r(Ys[passveto]), fj2.delta_r(Ys[passveto])), axis=0),
    np.arange(0, 4, 0.2),
    histtype="step",
)
plt.xlabel("dR between farther w-tagged fatjet and gen Y")
plt.show()

In [None]:
plt.hist(fj3.delta_r(higgs[passveto]), np.arange(0, 4, 0.2), histtype="step")
plt.xlabel("dR between bb-tagged fatjet and gen Higgs")
plt.show()

In [None]:
fj1ws = (fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc) / (
    fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc + fj1.particleNetMD_QCD
)
fj2ws = (fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc) / (
    fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc + fj2.particleNetMD_QCD
)

fj1closer = ak.flatten(fj1.delta_r(Ys[passveto]) <= fj2.delta_r(Ys[passveto]))

plt.title("Higgs-matched w-tagged fatjet")
plt.hist(
    np.concatenate((fj1ws[fj1closer], fj2ws[~fj1closer])), np.arange(0.8, 1, 0.02), histtype="step"
)
plt.xlabel("Wqq Score")
plt.show()

plt.title("Non-Higgs-matched w-tagged fatjet")
plt.hist(
    np.concatenate((fj1ws[~fj1closer], fj2ws[fj1closer])), np.arange(0.8, 1, 0.02), histtype="step"
)
plt.xlabel("Wqq Score")
plt.show()

In [None]:
fj1ws = (fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc) / (
    fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc + fj1.particleNetMD_QCD
)
fj2ws = (fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc) / (
    fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc + fj2.particleNetMD_QCD
)

fj1closer = ak.flatten(fj1.delta_r(Ys[passveto]) <= fj2.delta_r(Ys[passveto]))

plt.title("Higgs-matched w-tagged fatjet")
plt.hist(np.concatenate((fj1.msoftdrop[fj1closer], fj2.msoftdrop[~fj1closer])), histtype="step")
plt.xlabel("FatJet SD Mass")
plt.show()

plt.title("Non-Higgs-matched w-tagged fatjet")
plt.hist(np.concatenate((fj1.msoftdrop[~fj1closer], fj2.msoftdrop[fj1closer])), histtype="step")
plt.xlabel("FatJet SD Mass")
plt.show()

In [None]:
fj1ws = (fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc) / (
    fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc + fj1.particleNetMD_QCD
)
fj2ws = (fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc) / (
    fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc + fj2.particleNetMD_QCD
)

fj1closer = ak.flatten(fj1.delta_r(Ys[passveto]) <= fj2.delta_r(Ys[passveto]))

plt.title("Higgs-matched w-tagged fatjet")
plt.hist(
    np.concatenate((fj1.pt[fj1closer], fj2.pt[~fj1closer])),
    np.arange(0, 2000, 200),
    histtype="step",
)
plt.xlabel("FatJet pT")
plt.show()

plt.title("Non-Higgs-matched w-tagged fatjet")
plt.hist(
    np.concatenate((fj1.pt[~fj1closer], fj2.pt[fj1closer])),
    np.arange(0, 2000, 200),
    histtype="step",
)
plt.xlabel("FatJet pT")
plt.show()

In [None]:
fatjets.t

In [None]:
fj1t2 = fj1.tau2 / fj1.tau1
fj2t2 = fj2.tau2 / fj2.tau1

fj1closer = ak.flatten(fj1.delta_r(Ys[passveto]) <= fj2.delta_r(Ys[passveto]))

plt.title("Higgs-matched w-tagged fatjet")
plt.hist(np.concatenate((fj1t2[fj1closer], fj2t2[~fj1closer])), histtype="step")
plt.xlabel("tau 2 / 1")
plt.show()

plt.title("Non-Higgs-matched w-tagged fatjet")
plt.hist(np.concatenate((fj1t2[~fj1closer], fj2t2[fj1closer])), histtype="step")
plt.xlabel("tau 2 / 1")
plt.show()

In [None]:
fj1t2 = fj1.tau4 / fj1.tau2
fj2t2 = fj2.tau4 / fj2.tau2

fj1closer = ak.flatten(fj1.delta_r(Ys[passveto]) <= fj2.delta_r(Ys[passveto]))

plt.title("Higgs-matched w-tagged fatjet")
plt.hist(np.concatenate((fj1t2[fj1closer], fj2t2[~fj1closer])), histtype="step")
plt.xlabel("tau 4 / 2")
plt.show()

plt.title("Non-Higgs-matched w-tagged fatjet")
plt.hist(np.concatenate((fj1t2[~fj1closer], fj2t2[fj1closer])), histtype="step")
plt.xlabel("tau 4 / 2")
plt.show()

In [None]:
ak.flatten(fj1closer)

In [None]:
fj2ws = (fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc) / (
    fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc + fj2.particleNetMD_QCD
)

In [None]:
_ = plt.hist(ltxbb[wtg2], bins, histtype="step", label=label, density=True)