In [1]:
import json
import uproot
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema, BaseSchema
from coffea import nanoevents
from coffea import processor
import time

import argparse
import warnings
import pyarrow as pa
import pyarrow.parquet as pq
import pickle as pkl
import pandas as pd
import os
import sys
sys.path.append("../")

from collections import defaultdict
import pickle as pkl
import pyarrow as pa
import awkward as ak
import numpy as np
import pandas as pd
import json
import os
import shutil
import pathlib
from typing import List, Optional
import pyarrow.parquet as pq

from coffea import processor
from coffea.nanoevents.methods import candidate, vector
from coffea.analysis_tools import Weights, PackedSelection
from boostedhiggs.utils import match_HWW
from boostedhiggs.btag import btagWPs
from boostedhiggs.btag import BTagCorrector

import warnings
warnings.filterwarnings("ignore", message="Found duplicate branch ")
warnings.filterwarnings("ignore", category=DeprecationWarning)
np.seterr(invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [2]:
# load a root file into coffea-friendly NanoAOD structure
import uproot
f = uproot.open(f"../datafiles/DYJetsToLL_Pt-50To100.root")
num = f['Events'].num_entries   ### checks number of events per file 
print(f'number of events per file is {num}')

events = nanoevents.NanoEventsFactory.from_root(f, "Events").events()

number of events per file is 96402


In [191]:
# make lepton selection
good_electrons = (
    (events.Electron.pt > 30))

good_muons = (
    (events.Muon.pt > 28))

good_electrons

<Array [[True, False, False, ... False, False]] type='96402 * var * bool'>

In [194]:
goodleptons = ak.concatenate([events.Muon[good_muons], events.Electron[good_electrons]], axis=1)
goodleptons

<Array [[Electron], ... Electron, Electron]] type='96402 * var * union[muon, ele...'>

In [196]:
goodleptons.pt

<Array [[82.8], [], [32.3, ... [], [70.9, 68]] type='96402 * var * float32'>

In [193]:
candidatelep = ak.firsts(goodleptons)
candidatelep

<Array [Electron, None, ... None, Electron] type='96402 * ?union[muon, electron]'>

In [127]:
def build_p4(cand):
    return ak.zip(
        {
            "pt": cand.pt,
            "eta": cand.eta,
            "phi": cand.phi,
            "mass": cand.mass,
            "charge": cand.charge,
        },
        with_name="PtEtaPhiMCandidate",
        behavior=candidate.behavior,
    )

candidatelep_p4 = build_p4(candidatelep)
candidatelep_p4

<PtEtaPhiMCandidateArray [{pt: 82.8, eta: -1.06, ... charge: 1}] type='96402 * P...'>

In [156]:
# get jets
good_jets = events.Jet[(events.Jet.pt > 60)]
good_jets

<JetArray [[Jet], [], [Jet, ... [Jet], [Jet, Jet]] type='96402 * var * jet'>

In [160]:
# dr between all jets and candidate lep
candidatelep_jets_dr = candidatelep_p4.delta_r(good_jets)
candidatelep_jets_dr

<Array [[0.00321], None, ... [0.0328, 3.78]] type='96402 * option[var * float32]'>

In [162]:
# get jet with leat dr with candidatelep
goodjet_lep = ak.firsts(good_jets[ak.argmin(candidatelep_jets_dr, axis=1, keepdims=True)])
goodjet_lep

<JetArray [Jet, None, None, ... Jet, None, Jet] type='96402 * ?jet'>

In [161]:
# leading pt jet
candidate_jet = ak.firsts(good_jets)
candidate_jet

<JetArray [Jet, None, Jet, Jet, ... Jet, Jet, Jet] type='96402 * ?jet'>

In [140]:
# second leading pt jet
secondfj = ak.pad_none(good_jets, 2, axis=1)[:, 1]
secondfj

<JetArray [None, None, Jet, ... None, None, Jet] type='96402 * ?jet'>

In [169]:
# get all jets in event of dr>0.8 with candidate jet
jet = events.Jet
jets_outside = jet[jet.delta_r(candidate_jet) > 0.8]
# get btag of those jets
jets_outside_btag = jets_outside.btagDeepFlavB
print('jets_outside_btag', jets_outside_btag)

jets_outside_btag [[0.56, 0.077, 0.231], None, ... 0.0359, 0.0547, 0.0536, 0.0419, 0.0649, 0.184]]


In [170]:
# get max btagged jet of those outside jets in each event
jets_outside_btag_max = ak.max(jets_outside_btag, axis=1)
jets_outside_btag_max

<Array [0.56, None, 0.0671, ... 0.171, 0.184] type='96402 * ?float32'>

In [165]:
def pad_val(
    arr: ak.Array,
    value: float,
    target: int = None,
    axis: int = 0,
    to_numpy: bool = False,
    clip: bool = True,
):
    """
    basically: preserves the nested structure of the ak array and replaces None values with -1
    pads awkward array up to ``target`` index along axis ``axis`` with value ``value``,
    optionally converts to numpy array
    """
    if target:
        ret = ak.fill_none(ak.pad_none(arr, target, axis=axis, clip=clip), value, axis=None)
    else:
        ret = ak.fill_none(arr, value, axis=None)
    return ret.to_numpy() if to_numpy else ret

In [176]:
pad_val(jets_outside_btag_max, -1)

<Array [0.56, -1, 0.0671, ... 0.171, 0.184] type='96402 * float64'>

In [181]:
# axis=None will flatten all the nests
flat = ak.flatten(jets_outside_btag, axis=None)
flat

<Array [0.56, 0.077, 0.231, ... 0.0649, 0.184] type='345471 * float32'>