In [1]:
import numpy as np
import coffea
from coffea import processor,hist,util
from coffea.analysis_tools import Weights, PackedSelection
from boostedhiggs.corrections import (
    corrected_msoftdrop,
)

%matplotlib inline
import matplotlib.pyplot as plt
import awkward1 as ak
import os

# for old pancakes
from coffea.nanoaod.methods import collection_methods, FatJet
collection_methods['CustomAK8Puppi'] = FatJet
collection_methods['CustomAK8PuppiSubjet'] = FatJet
FatJet.subjetmap['CustomAK8Puppi'] = 'CustomAK8PuppiSubjet'

In [2]:
def getBosons(genparticles):
    absid = abs(genparticles.pdgId)
    return genparticles[
        # no gluons
        (absid >= 22)
        & (absid <= 25)
        & genparticles.hasFlags(['fromHardProcess', 'isLastCopy'])
    ]

In [14]:
class HwwProcessorTrigger(processor.ProcessorABC):
    def __init__(self, year):
        self._year = year
        self._btagWPs = {
            'medium': {
                '2016': 0.6321,
                '2017': 0.4941,
                '2018': 0.4184,
            },
        }
        self._metFilters = {
            '2016': [
                "goodVertices",
                "globalSuperTightHalo2016Filter",
                "HBHENoiseFilter",
                "HBHENoiseIsoFilter",
                "EcalDeadCellTriggerPrimitiveFilter",
                "BadPFMuonFilter",
            ],
            '2017': [
                "goodVertices",
                "globalSuperTightHalo2016Filter",
                "HBHENoiseFilter",
                "HBHENoiseIsoFilter",
                "EcalDeadCellTriggerPrimitiveFilter",
                "BadPFMuonFilter",
                "BadChargedCandidateFilter",
                "eeBadScFilter",
                "ecalBadCalibFilter",
            ],
            '2018': [
                "goodVertices",
                "globalSuperTightHalo2016Filter",
                "HBHENoiseFilter",
                "HBHENoiseIsoFilter",
                "EcalDeadCellTriggerPrimitiveFilter",
                "BadPFMuonFilter",
                "BadChargedCandidateFilter",
                "eeBadScFilter",
                "ecalBadCalibFilterV2",
            ],
        }
        self._triggers = {
            '2016_had': [
                "PFHT800",
                "PFHT900",
                "AK8PFJet360_TrimMass30",
                "AK8PFHT700_TrimR0p1PT0p03Mass50",
                "PFHT650_WideJetMJJ950DEtaJJ1p5",
                "PFHT650_WideJetMJJ900DEtaJJ1p5",
                "PFJet450",
            ],
            '2017_had': [
                "PFHT1050",
                "AK8PFJet400_TrimMass30",
                "AK8PFJet420_TrimMass30",
                "AK8PFHT800_TrimMass50",
                "PFJet500",
                "AK8PFJet500",
                #"AK8PFJet330_PFAK8BTagCSV_p17" # not present in 2018D
            ],
            '2016_muon': [
                "Mu50",
                "Mu55",
            ],
            '2017_muon': [
                "Mu50",
            ],
            '2017_vvlmuon':[
                #"Mu15_IsoVVVL_PFHT450_PFMET50",                                                                                                                                                            
                "Mu15_IsoVVVL_PFHT600",
            ],
            '2017_electron': [
                #"Ele20_WPLoose_Gsf",
                #"Ele27_WPTight_Gsf",                                                                                                                                                                                       
                #"Ele35_WPTight_Gsf",
                #"Ele40_WPTight_Gsf",
                "Ele115_CaloIdVT_GsfTrkIdT",
                "Ele50_CaloIdVT_GsfTrkIdT_PFJet165",
            ],
            '2017_vvlelectron': [
                #"Ele15_IsoVVVL_PFHT450_PFMET50",                                                                                                                                                           
                "Ele15_IsoVVVL_PFHT600",
            ],
            '2017_met':[
                "PFMETNoMu110_PFMHTNoMu110_IDTight",
                "PFMETNoMu120_PFMHTNoMu120_IDTight_PFHT60",
                "PFMETNoMu120_PFMHTNoMu120_IDTight",
            ],
        }
        self._triggers['2016_hadhad'] = self._triggers['2016_had']
        self._triggers['2016_hadmu'] = self._triggers['2016_muon']+self._triggers['2016_had']+self._triggers['2017_vvlmuon']
        self._triggers['2016_hadel'] = self._triggers['2017_electron']+self._triggers['2016_had']+self._triggers['2017_vvlelectron']

        self._triggers['2017_hadhad'] = self._triggers['2017_had']
        self._triggers['2017_hadmu'] = self._triggers['2017_muon']+self._triggers['2017_had']+self._triggers['2017_vvlmuon']
        self._triggers['2017_hadel'] = self._triggers['2017_electron']+self._triggers['2017_had']+self._triggers['2017_vvlelectron']

        self._triggers['2018_hadhad'] = self._triggers['2017_had']
        self._triggers['2018_hadmu'] = self._triggers['2017_hadmu']
        self._triggers['2018_hadel'] = self._triggers['2017_hadel']

        self._ref_triggers = {
            'MET': [
                "PFMETNoMu110_PFMHTNoMu110_IDTight",
            ],
            'Mu': [
                "Mu50",
            ],
            'Ele': [
                "Ele50_CaloIdVT_GsfTrkIdT_PFJet165",
                "Ele115_CaloIdVT_GsfTrkIdT",
            ],
            'EleLow':[
                "Ele35_WPTight_Gsf",
            ]   
        }
        # MET ref trigger
        self._ref_triggers['2016_hadhad'] = self._ref_triggers['MET']
        self._ref_triggers['2016_hadmu'] = self._ref_triggers['MET']
        self._ref_triggers['2016_hadel'] = self._ref_triggers['MET']

        self._ref_triggers['2017_hadhad'] = self._ref_triggers['MET']
        self._ref_triggers['2017_hadmu'] = self._ref_triggers['MET']
        self._ref_triggers['2017_hadel'] = self._ref_triggers['MET']

        self._ref_triggers['2018_hadhad'] = self._ref_triggers['MET']
        self._ref_triggers['2018_hadmu'] = self._ref_triggers['MET']
        self._ref_triggers['2018_hadel'] = self._ref_triggers['MET']
        
        # for non-MET ref trigger
        '''
        self._ref_triggers['2016_hadhad'] = self._ref_triggers['Mu']
        self._ref_triggers['2016_hadmu'] = self._ref_triggers['Ele']+self._ref_triggers['EleLow']
        self._ref_triggers['2016_hadel'] = self._ref_triggers['Mu']

        self._ref_triggers['2017_hadhad'] = self._ref_triggers['Mu']
        self._ref_triggers['2017_hadmu'] = self._ref_triggers['Ele']+self._ref_triggers['EleLow']
        self._ref_triggers['2017_hadel'] = self._ref_triggers['Mu']

        self._ref_triggers['2018_hadhad'] = self._ref_triggers['Mu']
        self._ref_triggers['2018_hadmu'] = self._ref_triggers['Ele']
        self._ref_triggers['2018_hadel'] = self._ref_triggers['Mu']   
        '''

        h_pt_bin = hist.Bin('h_pt', r'Higgs $p_{T}$ [GeV]', 10, 200, 700)
        jet_pt_bin = hist.Bin('jet_pt', r'Jet $p_{T}$ [GeV]', 10, 300, 800)
        lep_pt_bin = hist.Bin('lep_pt', r'Lepton $p_{T}$ [GeV]', 10, 20., 140)
        jet_msd_bin = hist.Bin('jet_msd', r'Jet $m_{SD}$ [GeV]', 10, 0., 100.)
        jet_lep_dr_bin = hist.Bin('jet_lep_dr', r'Jet-lepton $\Delta R$', 10, 0., 1.)
        lep_miso_bin = hist.Bin('lep_miso', r'Lepton miniIso', 10, 0., 1.)
        
        self._accumulator = processor.dict_accumulator({
            # dataset -> sumw                                                                                                                                                                               
            'sumw': processor.defaultdict_accumulator(float),
            'trigeff_h': hist.Hist(
                'Events',
                hist.Cat('dataset', 'Dataset'),
                hist.Cat('region', 'Region'),
                hist.Bin('trig_pass_hadhad', r'Trigger Pass Bit (HadHad)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_hadel', r'Trigger Pass Bit (HadEl)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_hadmu', r'Trigger Pass Bit (HadMu)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_ref', r'Trigger Pass Bit (Reference)', 2, -0.5, 1.5),
                h_pt_bin, jet_pt_bin, lep_pt_bin,
            ),
            'trigeff_m': hist.Hist(
                'Events',
                hist.Cat('dataset', 'Dataset'),
                hist.Cat('region', 'Region'),
                hist.Bin('trig_pass_hadhad', r'Trigger Pass Bit (HadHad)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_hadel', r'Trigger Pass Bit (HadEl)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_hadmu', r'Trigger Pass Bit (HadMu)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_ref', r'Trigger Pass Bit (Reference)', 2, -0.5, 1.5),
                jet_msd_bin, jet_pt_bin, lep_pt_bin,
            ),
            'trigeff_dr': hist.Hist(
                'Events',
                hist.Cat('dataset', 'Dataset'),
                hist.Cat('region', 'Region'),
                hist.Bin('trig_pass_hadhad', r'Trigger Pass Bit (HadHad)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_hadel', r'Trigger Pass Bit (HadEl)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_hadmu', r'Trigger Pass Bit (HadMu)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_ref', r'Trigger Pass Bit (Reference)', 2, -0.5, 1.5),
                jet_lep_dr_bin, jet_pt_bin, lep_pt_bin,
            ),
             'trigeff_miso': hist.Hist(
                'Events',
                hist.Cat('dataset', 'Dataset'),
                hist.Cat('region', 'Region'),
                hist.Bin('trig_pass_hadhad', r'Trigger Pass Bit (HadHad)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_hadel', r'Trigger Pass Bit (HadEl)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_hadmu', r'Trigger Pass Bit (HadMu)', 2, -0.5, 1.5),
                hist.Bin('trig_pass_ref', r'Trigger Pass Bit (Reference)', 2, -0.5, 1.5),
                lep_miso_bin, jet_pt_bin, lep_pt_bin,
            ),
        })
            
    @property
    def accumulator(self):
        return self._accumulator
    
    def process(self, events):
        dataset = events.metadata['dataset']
        isRealData = not hasattr(events, "genWeight")
        selection = PackedSelection()
        weights = Weights(len(events))
        output = self.accumulator.identity()
        if not isRealData:
            output['sumw'][dataset] += ak.sum(events.genWeight)
        
        met_filters = np.ones(events.size, dtype='bool')
        for t in self._metFilters[self._year]:
            met_filters = met_filters & events.Flag[t]
        selection.add('met_filters', met_filters)
        
        trigger_ref = {}
        trigger_ref["hadhad"] = np.zeros(events.size, dtype='bool')
        trigger_ref["hadel"] = np.zeros(events.size, dtype='bool')
        trigger_ref["hadmu"] = np.zeros(events.size, dtype='bool')
        for t in self._ref_triggers[self._year+"_hadhad"]:
            trigger_ref["hadhad"] = trigger_ref["hadhad"] | events.HLT[t]
        for t in self._ref_triggers[self._year+"_hadel"]:
            trigger_ref["hadel"] = trigger_ref["hadel"] | events.HLT[t]
        for t in self._ref_triggers[self._year+"_hadmu"]:
            trigger_ref["hadmu"] = trigger_ref["hadmu"] | events.HLT[t]
            
        trigger_hadhad = np.zeros(events.size, dtype='bool')
        for t in self._triggers[self._year+"_hadhad"]:
            trigger_hadhad = trigger_hadhad | events.HLT[t]
        if isRealData:
            selection.add('hadhad_trigger', trigger_ref["hadhad"])
        else:
            selection.add('hadhad_trigger', np.ones(events.size, dtype='bool'))
            
        trigger_hadmu = np.zeros(events.size, dtype='bool')
        for t in self._triggers[self._year+"_hadmu"]:
            trigger_hadmu = trigger_hadmu | events.HLT[t]
        if isRealData:
            selection.add('hadmu_trigger', trigger_ref["hadmu"])
        else:
            selection.add('hadmu_trigger', np.ones(events.size, dtype='bool'))

        trigger_hadel = np.zeros(events.size, dtype='bool')
        for t in self._triggers[self._year+"_hadel"]:
            trigger_hadel = trigger_hadel | events.HLT[t]
        if isRealData:
            selection.add('hadel_trigger', trigger_ref["hadel"])
        else:
            selection.add('hadel_trigger', np.ones(events.size, dtype='bool'))
        
        # leptons for triggering
        goodmuon = (
            #(events.Muon.pt > 10)
            (events.Muon.pt > 55)
            & (np.abs(events.Muon.eta) < 2.4)
            & (events.Muon.tightId).astype(bool)
        )
        ngoodmuons = ak.sum(goodmuon, axis=1)
        leadingmuon = ak.firsts(events.Muon[goodmuon])
        
        # electron mask
        #el_tight_cuts = [(np.bitwise_and(np.right_shift(events.Electron.vidNestedWPBitmap,events.Electron.vidNestedWPBitmap.ones_like()*(3*k)),events.Electron.vidNestedWPBitmap.ones_like()*7) >= events.Electron.TIGHT) for k in range(10) if k != 7]
        #elmask_tight = el_tight_cuts[0].ones_like().astype(bool)
        #for m in el_tight_cuts: elmask_tight = elmask_tight & m
        
        goodelectron = (
            (events.Electron.pt > 55)
            & (abs(events.Electron.eta) < 2.5)
            & (events.Electron.cutBased >= events.Electron.TIGHT) 
            # & elmask_tight
        )
        print(goodelectron.shape,events.shape)
        ngoodelectrons = ak.sum(goodelectron, axis=1)
        leadingelectron = ak.firsts(events.Electron[goodelectron])
        print(ak.firsts(events.Electron[events.Electron.pt>100]).eta)
        
        # candidate lep by channel
        candidatelep = {}
        candidatelep["hadhad"] = leadingmuon
        candidatelep["hadel"] = leadingelectron
        candidatelep["hadmu"] = leadingmuon
        
        print('ngood',ngoodelectrons,ngoodmuons)
        print('ele ',leadingelectron)
        print('mu ',leadingmuon)
            
        # for veto purposes
        nmuons = ak.sum(
            (events.Muon.pt > 15)
            & (abs(events.Muon.eta) < 2.4)
            & (events.Muon.looseId).astype(bool),
            axis = 1,
        )
        nelectrons = ak.sum(
            (events.Electron.pt > 15)
            & (abs(events.Electron.eta) < 2.5)
            & (events.Electron.cutBased >= events.Electron.VETO),
            axis = 1,
        )
        ntaus = ak.sum(
            (events.Tau.pt > 20)
            & events.Tau.idDecayMode,
            axis=1,
        )
        
        selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('onemuon', (ngoodmuons == 1) & (ntaus == 0))
        selection.add('oneelectron', (ngoodelectrons == 1) & (ntaus == 0))
        
        # jets
        fatjets = events.FatJet
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        candidatejets = fatjets[
            # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269
            (fatjets.pt > 200)
            & (abs(fatjets.eta) < 2.5)
            & fatjets.isTight  # this is loose in sampleContainer
        ]
    
        # candidate jet by channel
        candidatejet = {}
        candidatejet['hadhad'] = ak.fill_none(ak.firsts(candidatejets),0)
        print(candidatejets)
        print(candidatelep['hadel'])
        print(leadingelectron)
        print(ak.cartesian([candidatejets, candidatelep['hadel']], axis=1))
        jetlep_pairs = ak.cartesian([candidatejets, candidatelep['hadel']], axis=1)
        dr = jetlep_pairs
        j, l = ak.unzip(jetlep_pairs)
        
        candidatejet['hadel'] = candidatejets[ak.argmin(candidatejets.delta_r(candidatelep['hadel']))]
        candidatejet['hadmu'] = candidatejets[ak.argmin(candidatejets.delta_r(candidatelep['hadmu']))]
        
        selection.add('hadhad_jetacceptance', (candidatejet['hadhad'].pt > 300) & (abs(candidatejet['hadhad'].eta) < 2.4))
        selection.add('hadel_jetacceptance', (candidatejet['hadel'].pt > 300) & (abs(candidatejet['hadel'].eta) < 2.4))
        selection.add('hadmu_jetacceptance', (candidatejet['hadmu'].pt > 300) & (abs(candidatejet['hadmu'].eta) < 2.4))

        # gen info
        if isRealData:
            genBosonPt = 0
        else:
            weights.add('genweight', events.genWeight)
            bosons = getBosons(events)
            genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
                
        # should add later a MET cut
        regions = {
            'hadhad_signal': ['met_filters','hadhad_jetacceptance', 'hadhad_trigger', 'noleptons'],
            'hadmu_signal': ['met_filters','hadmu_jetacceptance', 'hadmu_trigger', 'onemuon'],
            'hadel_signal': ['met_filters','hadel_jetacceptance', 'hadel_trigger', 'oneelectron'],
        }

        def normalize(val, cut):
            return ak.to_numpy(ak.fill_none(val[cut], np.nan))

        def fill(region, systematic, wmod=None):
            if (region.startswith("hadhad")): chan = "hadhad"
            elif (region.startswith("hadel")): chan = "hadel"
            elif (region.startswith("hadmu")): chan = "hadmu"
            else: chan = ""
            selections = regions[region]
            cut = selection.all(*selections)
            sname = 'nominal' if systematic is None else systematic
            if wmod is None:
                weight = weights.weight(modifier=systematic)[cut]
            else:
                weight = weights.weight()[cut] * wmod[cut]
        
            output['trigeff_h'].fill(
                dataset=dataset,
                region=region,
                trig_pass_hadhad=trigger_hadhad[cut],
                trig_pass_hadel=trigger_hadel[cut],
                trig_pass_hadmu=trigger_hadmu[cut],
                trig_pass_ref=trigger_ref[chan][cut],
                h_pt=normalize(genBosonPt,cut),
                jet_pt=normalize(candidatejet[chan].pt,cut),
                lep_pt=normalize(candidatelep[chan].pt,cut),
                # weight=weight,
            )
            
            output['trigeff_m'].fill(
                dataset=dataset,
                region=region,
                trig_pass_hadhad=trigger_hadhad[cut],
                trig_pass_hadel=trigger_hadel[cut],
                trig_pass_hadmu=trigger_hadmu[cut],
                trig_pass_ref=trigger_ref[chan][cut],
                jet_msd=normalize(candidatejet[chan].msdcorr,cut),
                jet_pt=normalize(candidatejet[chan].pt,cut),
                lep_pt=normalize(candidatelep[chan].pt,cut),
                #weight=weight,
            )
            
            output['trigeff_dr'].fill(
                dataset=dataset,
                region=region,
                trig_pass_hadhad=trigger_hadhad[cut],
                trig_pass_hadel=trigger_hadel[cut],
                trig_pass_hadmu=trigger_hadmu[cut],
                trig_pass_ref=trigger_ref[chan][cut],
                jet_lep_dr=normalize(candidatejet[chan].delta_r(candidatelep['hadel']),cut),
                jet_pt=normalize(candidatejet[chan].pt,cut),
                lep_pt=normalize(candidatelep[chan].pt,cut),
                #weight=weight,
            )

            output['trigeff_miso'].fill(
                dataset=dataset,
                region=region,
                trig_pass_hadhad=trigger_hadhad[cut],
                trig_pass_hadel=trigger_hadel[cut],
                trig_pass_hadmu=trigger_hadmu[cut],
                trig_pass_ref=trigger_ref[chan][cut],
                lep_miso=normalize(candidatelep[chan].miniPFRelIso_all,cut),
                jet_pt=normalize(candidatejet[chan].pt,cut),
                lep_pt=normalize(candidatelep[chan].pt,cut),
                #weight=weight,
            )
        
        systematics = [None]
        for region in regions:
            for systematic in systematics:
                fill(region, systematic)
                
        return output
        
        
    def postprocess(self, accumulator):
        return accumulator

    

In [15]:
files = {
    'HToWW_private': [
        'data/hww_2017mc_ULnano_RunIISummer19UL17MiniAOD_Feb10.root'
    ]
}

p = HwwProcessorTrigger(year='2017')
args = {
    #'flatten': True,
    'workers': 1,
    'savemetrics': True,
    #'compression': 0,
    'nano': True,
}

from pyinstrument import Profiler
profiler = Profiler()
profiler.start()
# processor.iterative_executor
out, metrics = processor.run_uproot_job(files, 'Events', p, processor.iterative_executor, args)
profiler.stop()

HBox(children=(HTML(value='Processing'), FloatProgress(value=0.0, max=3.0), HTML(value='')))

(94295,) (94295,)



AttributeError: no field named 'eta'

(https://github.com/scikit-hep/awkward-1.0/blob/0.3.1/src/awkward1/highlevel.py#L1095)