In [1]:
import numpy as np
import awkward as ak
from coffea import processor, util

%matplotlib inline
import matplotlib.pyplot as plt

import boostedhiggs

In [4]:
class HbbProcessor(processor.ProcessorABC):
    def __init__(self, year='2017'):
        self._year = year
        
        self._btagWPs = {
            'med': {
                '2016': 0.6321,
                '2017': 0.4941,
                '2018': 0.4184,
            },
        }
        
        self._accumulator = processor.dict_accumulator({
            "cutflow": processor.defaultdict_accumulator(float),
        })

    @property
    def accumulator(self):
        return self._accumulator

    def process(self, df):
        dataset = df['dataset']
        isRealData = 'genWeight' not in df
        events = boostedhiggs.buildevents(df)
        output = self.accumulator.identity()
        selection = processor.PackedSelection()

        fatjets = events.fatjets
        fatjets['msdcorr'] = boostedhiggs.corrected_msoftdrop(fatjets)
        fatjets['rho'] = 2*np.log(fatjets.msdcorr / fatjets.p4.pt)
        fatjets['n2ddt'] = fatjets.n2 - boostedhiggs.n2ddt_shift(fatjets, year=self._year)
        
        leadingjet = fatjets[:, 0:1]
        selection.add('jetkin', (
            (leadingjet.p4.pt > 450)
            & (leadingjet.p4.eta < 2.4)
            & (leadingjet.msdcorr > 40.)
        ).any())
        selection.add('jetid', (leadingjet.jetId & 2).any())  # tight id
        selection.add('n2ddt', (leadingjet.n2ddt < 0.).any())
        
        # only consider first 4 jets to be consistent with old framework
        jets = events.jets[
            (events.jets.p4.pt > 30.)
            & (events.jets.localindex < 4)
            & (events.jets.jetId & 2)  # tight id
        ]
        ak4_ak8_pair = jets.cross(leadingjet, nested=True)
        dphi = ak4_ak8_pair.i0.p4.delta_phi(ak4_ak8_pair.i1.p4)
        ak4_opposite = jets[(np.abs(dphi) > np.pi / 2).all()]
        selection.add('antiak4btagMediumOppHem', ak4_opposite.deepcsvb.max() < self._btagWPs['med'][self._year])
        ak4_away = jets[(np.abs(dphi) > 0.8).all()]
        selection.add('ak4btagMedium08', ak4_away.deepcsvb.max() > self._btagWPs['med'][self._year])
        
        selection.add('met', events.met.rho < 140.)        
        goodmuon = (
            (events.muons.p4.pt > 10)
            & (np.abs(events.muons.p4.eta) < 2.4)
            & (events.muons.pfRelIso04_all < 0.25)
            & (events.muons.looseId).astype(bool)
        )
        nmuons = goodmuon.sum()
        leadingmuon = events.muons[goodmuon][:, 0:1]
        muon_ak8_pair = leadingmuon.cross(leadingjet, nested=True)
        
        nelectrons = (
            (events.electrons.p4.pt > 10)
            & (np.abs(events.electrons.p4.eta) < 2.5)
            & (events.electrons.cutBased & (1<<2)).astype(bool)  # 2017V2 loose
        ).sum()
        
        ntaus = (
            (events.taus.p4.pt > 20)
            & (events.taus.idDecayMode).astype(bool)
            # bacon iso looser than Nano selection
        ).sum()
        
        selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('onemuon', (nmuons == 1) & (nelectrons == 0) & (ntaus == 0))
        selection.add('muonkin', (
            (leadingmuon.p4.pt > 55.)
            & (np.abs(leadingmuon.p4.eta()) < 2.1)
        ).all())
        selection.add('muonDphiAK8', (
            muon_ak8_pair.i0.p4.delta_phi(muon_ak8_pair.i1.p4) > 2*np.pi/3
        ).all().all())
        
        # sets of cuts are stacked as we go through the (ordered) dict
        cutflow = {
            'none': set(),
            'jet kinematics': {'jetkin'},
            'jet id, n2': {'jetid', 'n2ddt'},
            'ttrej': {'antiak4btagMediumOppHem', 'met', 'noleptons'},
        }
        allcuts = set()
        for name, cuts in cutflow.items():
            allcuts.update(cuts)
            output['cutflow'][name] += selection.all(*allcuts).sum()
            
        weights = processor.Weights(len(events))
        if not isRealData:
            weights.add('genweight', events.genWeight)
            boostedhiggs.add_pileup_weight(weights, events.Pileup_nPU, year, dataset)
            bosons = (
                (np.abs(events.genpart.pdgId) >= 21)
                & (np.abs(events.genpart.pdgId) <= 37)
                & (events.genpart.statusFlags & (1<<7)).astype(bool)  # isHardProcess
                & (events.genpart.genPartIdxMother >= 0)
            )
            boostedhiggs.add_VJets_NLOkFactor(weights, genBoson, year, dataset)
        
        return output

    def postprocess(self, accumulator):
        return accumulator

In [5]:
files = {
    'ttbar': [
        'data/TTToHadronic_TuneCP5_13TeV-powheg-pythia8_10X.root'
    ]
}

p = HbbProcessor(year='2018')
exe_config = {
    'flatten': True,
    #'workers': 4,
    'savemetrics': True,
}

from pyinstrument import Profiler
profiler = Profiler()
profiler.start()
output, metrics = processor.run_uproot_job(files, 'Events', p, processor.iterative_executor, exe_config)
profiler.stop()

HBox(children=(IntProgress(value=0, description='Processing', max=7, style=ProgressStyle(description_width='inâ€¦




<pyinstrument.session.ProfilerSession at 0x10e1a4ad0>

In [6]:
evtrate = metrics['entries'].value / metrics['processtime'].value
print("Events / s / thread: %.0f" % evtrate)

Events / s / thread: 44598


In [7]:
with open('profile.html', 'w') as fout:
    fout.write(profiler.output_html())

In [8]:
output

{'cutflow': defaultdict_accumulator(float,
                         {'none': 1374488.0,
                          'jet kinematics': 11873.0,
                          'jet id, n2': 3237.0,
                          'ttrej': 1942.0})}