In [2]:
import uproot
import awkward as ak
from coffea import nanoevents
from coffea.nanoevents.methods.base import NanoEventsArray
from coffea.analysis_tools import PackedSelection

from coffea.nanoevents.methods import vector

ak.behavior.update(vector.behavior)

import pickle
import numpy as np

from typing import Optional, List, Dict
from copy import copy

import matplotlib.pyplot as plt
import mplhep as hep
from matplotlib import colors

from tqdm import tqdm
import fastjet
import jetnet

import os


In [3]:
plot_dir = "../../../plots/ScaleFactors/Nov14/"
_ = os.system(f"mkdir -p {plot_dir}")


In [4]:
events = nanoevents.NanoEventsFactory.from_root(
    # "/eos/uscms/store/user/lpcpfnano/cmantill/v2_3/2017/HH_gen/GluGluToHHTobbVV_node_cHHH1_TuneCP5_13TeV-powheg-pythia8/GluGluToHHTobbVV_node_cHHH1/221017_221918/0000/nano_mc2017_100.root",
    "/eos/uscms/store/user/lpcpfnano/drankin/v2_2/2017/TTbar/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/TTToSemiLeptonic_ext1/211112_132937/0000/nano_mc2017_100.root",
    schemaclass=nanoevents.NanoAODSchema,
).events()




### Boosted Top Pre-selection

Based on selection in https://indico.cern.ch/event/1208247/#10-lund-plane-reweighting-for

In [5]:
def pad_val(
    arr: ak.Array,
    target: int,
    value: float,
    axis: int = 0,
    to_numpy: bool = True,
    clip: bool = True,
):
    """
    pads awkward array up to ``target`` index along axis ``axis`` with value ``value``,
    optionally converts to numpy array
    """
    ret = ak.fill_none(ak.pad_none(arr, target, axis=axis, clip=clip), value, axis=axis)
    return ret.to_numpy() if to_numpy else ret


def add_selection(
    name: str,
    sel: np.ndarray,
    selection: PackedSelection,
    cutflow: dict = None,
    isData: bool = False,
    signGenWeights: ak.Array = None,
):
    """adds selection to PackedSelection object and the cutflow dictionary"""
    selection.add(name, sel)
    if cutflow is not None:
        cutflow[name] = (
            np.sum(selection.all(*selection.names))
            if isData
            # add up sign of genWeights for MC
            else np.sum(signGenWeights[selection.all(*selection.names)])
        )


preselection_cut_vals = {"pt": 250, "msd": 20}
num_jets = 1

preselection_cut = np.prod(
    pad_val(
        (events.FatJet.pt > preselection_cut_vals["pt"])
        * (events.FatJet.msoftdrop > preselection_cut_vals["msd"]),
        num_jets,
        False,
        axis=1,
    ),
    axis=1,
).astype(bool)

presel_events = events[preselection_cut]


In [6]:
isData = False
signGenWeights = None if isData else np.sign(presel_events["genWeight"])
n_events = len(presel_events) if isData else int(np.sum(signGenWeights))
selection = PackedSelection()

cutflow = {}
cutflow["presel"] = len(presel_events)


In [7]:
d_PDGID = 1
b_PDGID = 5
g_PDGID = 21
TOP_PDGID = 6

ELE_PDGID = 11
vELE_PDGID = 12
MU_PDGID = 13
vMU_PDGID = 14
TAU_PDGID = 15
vTAU_PDGID = 16

Z_PDGID = 23
W_PDGID = 24
HIGGS_PDGID = 25

b_PDGIDS = [511, 521, 523]

GRAV_PDGID = 39

GEN_FLAGS = ["fromHardProcess", "isLastCopy"]

FILL_NONE_VALUE = -99999


skim_vars = {
    "eta": "eta",
    "phi": "phi",
    "mass": "mass",
    "pt": "pt",
}

# finding the two gen tops
tops = presel_events.GenPart[
    (abs(presel_events.GenPart.pdgId) == TOP_PDGID) * presel_events.GenPart.hasFlags(GEN_FLAGS)
]


In [8]:
deltaR = 0.8


In [9]:
tops_children = tops.distinctChildren
tops_children = tops_children[tops_children.hasFlags(GEN_FLAGS)]
ws = ak.flatten(tops_children[np.abs(tops_children.pdgId) == W_PDGID], axis=2)
had_top_sel = np.all(np.abs(ws.children.pdgId) <= 5, axis=2)
had_ws = ak.flatten(ws[had_top_sel])
had_tops = ak.flatten(tops[had_top_sel])
had_top_children = ak.flatten(tops_children[had_top_sel], axis=1)
had_bs = had_top_children[np.abs(had_top_children.pdgId) == 5]
add_selection(
    "hadronic bs", np.any(had_bs.pdgId, axis=1), selection, cutflow, isData, signGenWeights
)


In [10]:
had_w_jet_match = ak.all(had_ws.children.delta_r(presel_events.FatJet[:, 0]) < deltaR, axis=1)
had_b_jet_match = ak.flatten(
    pad_val(had_bs.delta_r(presel_events.FatJet[:, 0]) < deltaR, 1, False, axis=1, to_numpy=False)
)
merged_top_jet_match = had_w_jet_match * had_b_jet_match


In [11]:
had_top_jet_match = presel_events.FatJet[:, 0].delta_r(hadronic_tops) < deltaR
add_selection("jet not matched", ~had_top_jet_match, unmatched_selection)


NameError: name 'hadronic_tops' is not defined

In [12]:
merged_top_events = presel_events[merged_top_jet_match]
had_top_jets = merged_top_events.FatJet[:, 0]

merged_ak8_pfcands = merged_top_events.FatJetPFCands
merged_ak8_pfcands = merged_ak8_pfcands[merged_ak8_pfcands.jetIdx == 0]
merged_pfcands = merged_top_events.PFCands[merged_ak8_pfcands.pFCandsIdx]


In [72]:
merged_pfcands_vector_ptetaphi = ak.Array(
    [
        [{kin_key: cand[kin_key] for kin_key in skim_vars} for cand in event_cands]
        for event_cands in merged_pfcands
    ],
    with_name="PtEtaPhiMLorentzVector",
)


In [14]:
# jet definitions
dR = 0.8
cadef = fastjet.JetDefinition(fastjet.cambridge_algorithm, dR)
ktdef = fastjet.JetDefinition(fastjet.kt_algorithm, dR)


In [110]:
# cluster first with kT
kt_clustering = fastjet.ClusterSequence(merged_pfcands_vector_ptetaphi, ktdef)
kt_subjet_consts = kt_clustering.exclusive_jets_constituents(2)

# then re-cluster with CA


In [140]:
# ca_clustering = fastjet.ClusterSequence(ak.flatten(kt_subjet_consts, axis=1), cadef)
ca_clustering = fastjet.ClusterSequence(kt_subjet_consts, cadef)

In [115]:
ak.flatten(kt_subjet_consts, axis=1)

<PtEtaPhiMLorentzVectorArray [[{eta: 0.0674, ... pt: 0.337}]] type='2128 * var *...'>

In [136]:
lds = ca_clustering.exclusive_jets_lund_declusterings(1)

In [139]:
lds[0][0]

<Array [{Delta: 0.497, ... kt: 0.149}] type='11 * {"Delta": float64, "kt": float64}'>

In [150]:
ca_clustering.exclusive_jets_lund_declusterings(1)

ValueError: in ListOffsetArray64 attempting to get 0, offsets[i] != offsets[i + 1] and offsets[i + 1] > len(content)

(https://github.com/scikit-hep/awkward-1.0/blob/1.10.2/src/libawkward/array/ListOffsetArray.cpp#L713)

In [135]:
kt_subjet_consts[0][1]

<PtEtaPhiMLorentzVectorArray [{eta: 0.752, ... pt: 0.948}] type='29 * PtEtaPhiML...'>

In [133]:
ca_subjet_consts[1][0]

<PtEtaPhiMLorentzVectorArray [{eta: 0.752, ... pt: 0.948}] type='29 * PtEtaPhiML...'>

In [121]:
lds[0][1]

<Array [{Delta: 0.371, ... kt: 0.149}] type='10 * {"Delta": float64, "kt": float64}'>

In [99]:
(ak.count(subjet_consts.px, axis=1) == 2)

<Array [True, True, True, ... False, False] type='29 * bool'>

In [109]:
np.sum(ak.count(ak.count(subjet_consts.px, axis=2), axis=1) == 2)

1064