In [1]:
import time

from coffea import hist, util
from coffea.analysis_objects import JaggedCandidateArray
import coffea.processor as processor
from functools import partial
import uproot

from awkward import JaggedArray
import numpy as np
import pickle

from coffea.lookup_tools import extractor, dense_lookup

import matplotlib.pyplot as plt
from matplotlib import ticker,colors

In [2]:
from utils.crossSections import crossSections, lumis
from utils.efficiencies import getMuSF, getEleSF


In [3]:
with open('utils/taggingEfficienciesDenseLookup.pkl','rb') as _file:
    taggingEffLookup = pickle.load(_file)

In [4]:
# Look at ProcessorABC to see the expected methods and what they are supposed to do
class TTGammaProcessor(processor.ProcessorABC):
#     def __init__(self, runNum = -1, eventNum = -1):
    def __init__(self, runNum = -1, eventNum = -1, mcSumWeights = None):
        dataset_axis = hist.Cat("dataset", "Dataset")
        year_axis = hist.Cat("year",r"Year")
        
        Npho_axis = hist.Bin("nPho", r"$Number of Photons$", 3, 0., 3)
        Nbjet_axis = hist.Bin("nBjet", r"BJet Multiplicity", 3, 0, 3)
        lep_axis = hist.Bin("lepFlavor", r"ElectronOrMuon", 3, -1.5, 1.5)
        
        m3_axis = hist.Bin("M3", r"$M_3$ [GeV]", 1000, 0., 1000)
        pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 200, 0., 1000)
        genpt_axis = hist.Bin("genpt", r"$Gen p_{T,\gamma}$ [GeV]", 200, 0., 1000)
        eta_axis = hist.Bin("eta", r"$\eta_{\gamma}$", 300, -1.5, 1.5)
        geneta_axis = hist.Bin("geneta", r"$Gen \eta_{\gamma}$", 300, -1.5, 1.5)
        sieie_axis = hist.Bin("sieie", r"$\sigma_{i\eta i\eta}$", 100, 0., 0.1)
        chIso_axis = hist.Bin("chIso", r"Charged Hadron Isolation", 200, 0., 20.)
        dR_axis = hist.Bin("dR", r"$\Delta R$", 600, 0., 6)
        mult_axis = hist.Bin("N", r"Multiplicity", 10, 0, 10)

        ## Define axis to keep track of photon category
        phoCategory_axis = hist.Bin("category", r"Photon Category", [1,2,3,4,5])
        phoCategory_axis.identifiers()[0].label = "Genuine Photon"    
        phoCategory_axis.identifiers()[1].label = "Misidentified Electron"    
        phoCategory_axis.identifiers()[2].label = "Hadronic Photon"    
        phoCategory_axis.identifiers()[3].label = "Hadronic Fake"    
        
        ###
        self._accumulator = processor.dict_accumulator({
            ##photon histograms
            'photon_pt': hist.Hist("Counts", dataset_axis, pt_axis, phoCategory_axis, year_axis,lep_axis),
            'photon_eta': hist.Hist("Counts", dataset_axis, eta_axis, phoCategory_axis, year_axis,lep_axis),
            'photon_sieie': hist.Hist("Counts", dataset_axis, sieie_axis, phoCategory_axis, year_axis,lep_axis),
            'photon_chIso': hist.Hist("Counts", dataset_axis, chIso_axis, phoCategory_axis, year_axis,lep_axis),
            'dR_lep_gamma': hist.Hist("Counts", dataset_axis, dR_axis,phoCategory_axis, year_axis,lep_axis),

            ##photon reco vs gen histograms
            'photon_RecoVsGenPt': hist.Hist("Counts", dataset_axis, pt_axis,genpt_axis, phoCategory_axis, year_axis,lep_axis),
            'photon_RecoVsGenEta': hist.Hist("Counts", dataset_axis, eta_axis,geneta_axis, phoCategory_axis, year_axis,lep_axis),

            ##jet histograms (split by nBjet, nPhoton, and lepton flavor)
            'pt_jet': hist.Hist("Counts", dataset_axis, pt_axis, year_axis,Npho_axis,Nbjet_axis,lep_axis),
            'jet_mult': hist.Hist("Counts", dataset_axis, mult_axis, year_axis,Npho_axis,Nbjet_axis,lep_axis),
            'M3': hist.Hist("Counts", dataset_axis, m3_axis, year_axis,Npho_axis,Nbjet_axis,lep_axis),

        })

        self.eventNum = eventNum
        self.runNum = runNum
        self.mcSumWeights = mcSumWeights
        
    @property
    def accumulator(self):
        return self._accumulator

    def process(self, df):
        output = self.accumulator.identity()

        datasetFull = df['dataset']
        if '2016' in datasetFull:
            year=2016
            yearStr="2016"
            muonTrigger = df['HLT_IsoMu24'] | df['HLT_IsoTkMu24']
            eleTrigger = df['HLT_Ele27_WPTight_Gsf']
            photonBitMapName = 'Photon_cutBased'
            btagSF = 'test_DeepCSV_2016LegacySF_V1.btag.csv'
            dataset=datasetFull.replace('_2016','')
        elif '2017' in datasetFull:
            year=2017
            yearStr="2017"
            muonTrigger = df['HLT_IsoMu27']
            eleTrigger = df['HLT_Ele32_WPTight_Gsf_L1DoubleEG'] | df['HLT_Ele32_WPTight_Gsf']
            photonBitMapName = 'Photon_cutBasedBitmap'
            btagSF = 'test_DeepCSV_102XSF_V1.btag.csv'
            dataset=datasetFull.replace('_2017','')
        elif '2018' in datasetFull:
            year=2018
            yearStr="2018"
            muonTrigger = df['HLT_IsoMu24']
            eleTrigger = df['HLT_Ele32_WPTight_Gsf']
            photonBitMapName = 'Photon_cutBasedBitmap'
            btagSF = 'test_DeepCSV_102XSF_V1.btag.csv'
            dataset=datasetFull.replace('_2018','')

        if dataset in ['SingleElectron', 'SingleMuon', 'EGamma']:
            isRealData = True
        else:
            isRealData = False
            mcSumWeight = self.mcSumWeights[datasetFull]
            xsec = crossSections[dataset]
        
        
        filters = (df['Flag_goodVertices'] &
                   df['Flag_globalSuperTightHalo2016Filter'] &
                   df['Flag_HBHENoiseFilter'] &
                   df['Flag_HBHENoiseIsoFilter'] &
                   df['Flag_EcalDeadCellTriggerPrimitiveFilter'] &
                   df['Flag_BadPFMuonFilter'] 
                  )
        if year > 2016:
            filters = (filters & 
                       df['Flag_ecalBadCalibFilterV2']
                      )
        
        
        
        muons = JaggedCandidateArray.candidatesfromcounts(
            df['nMuon'],
            pt=df['Muon_pt'],
            eta=df['Muon_eta'],
            phi=df['Muon_phi'],
            mass=df['Muon_mass'],
            charge=df['Muon_charge'],
            relIso=df['Muon_pfRelIso04_all'],
            tightId=df['Muon_tightId'],
            isPFcand=df['Muon_isPFcand'],
            isTracker=df['Muon_isTracker'],
            isGlobal=df['Muon_isGlobal'],           
        )
        
        electrons = JaggedCandidateArray.candidatesfromcounts(
            df['nElectron'],
            pt=df['Electron_pt'],
            eta=df['Electron_eta'],
            phi=df['Electron_phi'],
            mass=df['Electron_mass'],
            charge=df['Electron_charge'],
            cutBased=df['Electron_cutBased'],
            d0=df['Electron_dxy'],
            dz=df['Electron_dz'],
        )

        jets = JaggedCandidateArray.candidatesfromcounts(
            df['nJet'],
            pt=df['Jet_pt'],
            eta=df['Jet_eta'],
            phi=df['Jet_phi'],
            mass=df['Jet_mass'],
            jetId=df['Jet_jetId'],
            btag=df['Jet_btagDeepB'],
            hadFlav=df['Jet_hadronFlavour'],
            genIdx=df['Jet_genJetIdx'],
        )

        photons = JaggedCandidateArray.candidatesfromcounts(
            df['nPhoton'],
            pt=df['Photon_pt'],
            eta=df['Photon_eta'],
            phi=df['Photon_phi'],
            mass=np.zeros_like(df['Photon_pt']),
            isEE=df['Photon_isScEtaEE'],
            isEB=df['Photon_isScEtaEB'],
            photonId=df[photonBitMapName],
            passEleVeto=df['Photon_electronVeto'],
            pixelSeed=df['Photon_pixelSeed'],
            #genFlav=df['Photon_genPartFlav'],
            genIdx=df['Photon_genPartIdx'],
            sieie=df['Photon_sieie'],
            chIso=df['Photon_pfRelIso03_chg'],
            vidCuts=df['Photon_vidNestedWPBitmap'],
        )
            
        genPart = JaggedCandidateArray.candidatesfromcounts(
            df['nGenPart'],
            pt=df['GenPart_pt'],
            eta=df['GenPart_eta'],
            phi=df['GenPart_phi'],
            mass=df['GenPart_mass'],
            pdgid=df['GenPart_pdgId'],
            motherIdx=df['GenPart_genPartIdxMother'],
            status=df['GenPart_status'],
            statusFlags=df['GenPart_statusFlags'],
        )


        ## TTbar vs TTGamma Overlap Removal (work in progress, still buggy)
#         overlapRemoval = np.ones_like(df['event'])
#         if 'TTbar' in dataset:
#             overlapPhoSelect = ((genPart.pt>=10) & 
#                                 (abs(genPart.eta) < 5.) & 
#                                 (genPart.pdgid==22) & 
#                                 (genPart.status==1)
#                                )
            
#             OverlapPhotons = genPart[overlapPhoSelect] 

#             phoParent = OverlapPhotons.motherIdx
#             phoParent = phoParent[phoParent>-1]
#             isNonPrompt = (genPart[phoParent].pdgid>25).any()

#             while (phoParent>-1).any().sum()>0:
#                 phoParent = phoParent[phoParent>-1]
#                 isNonPrompt = isNonPrompt | (genPart[phoParent].pdgid>37).any()
#                 phoParent = genPart[phoParent].motherIdx        
#             ### Add in min DR cut
            

        
        
        muonSelectTight = ((muons.pt>30) & 
                           (abs(muons.eta)<2.4) & 
                           (muons.tightId) & 
                           (muons.relIso < 0.15)
                          )
        
        muonSelectLoose = ((muons.pt>15) & 
                           (abs(muons.eta)<2.4) & 
                           ((muons.isPFcand) & (muons.isTracker | muons.isGlobal)) & 
                           (muons.relIso < 0.25) &
                           np.invert(muonSelectTight)
                          )

        eleEtaGap = (abs(electrons.eta) < 1.4442) | (abs(electrons.eta) > 1.566)
        elePassD0 = ((abs(electrons.eta) < 1.479) & (abs(electrons.d0) < 0.05) |
                     (abs(electrons.eta) > 1.479)  & (abs(electrons.d0) < 0.1)
                    )
        elePassDZ = ((abs(electrons.eta) < 1.479) & (abs(electrons.dz) < 0.1) |
                     (abs(electrons.eta) > 1.479)  & (abs(electrons.dz) < 0.2)
                    )

        
        
        electronSelectTight = ((electrons.pt>35) & 
                               (abs(electrons.eta)<2.1) & 
                               eleEtaGap &      
                               (electrons.cutBased>=4) &
                               elePassD0 & 
                               elePassDZ
                              )

        electronSelectLoose = ((electrons.pt>15) & 
                               (abs(electrons.eta)<2.4) & 
                               eleEtaGap &      
                               (electrons.cutBased>=1) &
                               elePassD0 & 
                               elePassDZ & 
                               np.invert(electronSelectTight)
                              )
        
        tightMuon = muons[muonSelectTight]
        looseMuon = muons[muonSelectLoose]
        
        tightElectron = electrons[electronSelectTight]
        looseElectron = electrons[electronSelectLoose]

        

                
        oneMuon = (tightMuon.counts == 1)
        muVeto = (tightMuon.counts == 0)
        oneEle = (tightElectron.counts == 1)
        eleVeto = (tightElectron.counts == 0)
        looseMuonSel = (looseMuon.counts == 0)
        looseElectronSel = (looseElectron.counts == 0)

        
        #### Calculate deltaR between photon and nearest muon
        ####### make combination pairs
        phoMu = photons['p4'].cross(tightMuon['p4'],nested=True)
        ####### check delta R of each combination, if min is >0.1 it is okay, or if there are no tight muons it passes
        dRphomu = (phoMu.i0.delta_r(phoMu.i1)>0.4).all() | (tightMuon.counts==0)
        phoEle = photons['p4'].cross(tightElectron['p4'],nested=True)
        dRphoele = ((phoEle.i0.delta_r(phoEle.i1)).min()>0.4) | (tightElectron.counts==0)
        
        #photon selection (no ID requirement used here)
        photonSelect = ((photons.pt>20) & 
                        (abs(photons.eta) < 1.4442) &
                        (photons.isEE | photons.isEB) &
                        (photons.passEleVeto) & 
                        np.invert(photons.pixelSeed) & 
                        dRphomu & dRphoele
                       )
        
        
        #split out the ID requirement, enabling Iso and SIEIE to be inverted for control regions
        photonID = photons.photonId >= 2

        #parse VID cuts, define loose photons (not used yet)
        photon_MinPtCut = (photons.vidCuts>>0 & 3)>=2 
        photon_PhoSCEtaMultiRangeCut = (photons.vidCuts>>2 & 3)>=2 
        photon_PhoSingleTowerHadOverEmCut = (photons.vidCuts>>4 & 3)>=2  
        photon_PhoFull5x5SigmaIEtaIEtaCut = (photons.vidCuts>>6 & 3)>=2  
        photon_PhoAnyPFIsoWithEACut = (photons.vidCuts>>8 & 3)>=2  
        photon_PhoAnyPFIsoWithEAAndQuadScalingCut = (photons.vidCuts>>10 & 3)>=2  
        photon_PhoAnyPFIsoWithEACut = (photons.vidCuts>>12 & 3)>=2  
        
        loosePhotonID = photon_PhoSingleTowerHadOverEmCut & photon_PhoAnyPFIsoWithEACut & photon_PhoAnyPFIsoWithEAAndQuadScalingCut & photon_PhoAnyPFIsoWithEACut

        
        tightPhotons = photons[photonSelect & photonID]

        
        ##medium jet ID cut
        jetIDbit = 1
        if year>2016: jetIDbit=2

        ##check dR jet,lepton & jet,photon
        jetMu = jets['p4'].cross(tightMuon['p4'],nested=True)
        dRjetmu = ((jetMu.i0.delta_r(jetMu.i1)).min()>0.4) | (tightMuon.counts==0)

        jetEle = jets['p4'].cross(tightElectron['p4'],nested=True)
        dRjetele = ((jetEle.i0.delta_r(jetEle.i1)).min()>0.4) | (tightElectron.counts==0)

        jetPho = jets['p4'].cross(tightPhotons['p4'],nested=True)
        dRjetpho = ((jetPho.i0.delta_r(jetPho.i1)).min()>0.1) | (tightPhotons.counts==0)
        
        jetSelect = ((jets.pt > 30) &
                     (abs(jets.eta) < 2.4) &
                     ((jets.jetId >> jetIDbit & 1)==1) &
                     dRjetmu & dRjetele & dRjetpho                    
                    )

        tightJets = jets[jetSelect]
        
        bTagWP = 0.6321
        if year == 2017:
            bTagWP = 0.4941
        if year == 2018:
            bTagWP = 0.4184

        btagged = tightJets.btag>bTagWP

        bJets = tightJets[btagged]

        ## Define M3, mass of 3-jet pair with highest pT
        triJet = tightJets['p4'].choose(3)

        triJetPt = (triJet.i0 + triJet.i1 + triJet.i2).pt
        triJetMass = (triJet.i0 + triJet.i1 + triJet.i2).mass
        M3 = triJetMass[triJetPt.argmax()]


        leadingPhoton = tightPhotons[:,:1]

        #calculate dR lepton,photon
        gammaMu = leadingPhoton['p4'].cross(tightMuon['p4'],nested=True)
        dRgammaMu = (gammaMu.i0.delta_r(gammaMu.i1)).min()
        gammaEle = leadingPhoton['p4'].cross(tightElectron['p4'],nested=True)
        dRgammaEle = (gammaEle.i0.delta_r(gammaEle.i1)).min()

        dRgammaLep = np.minimum(dRgammaMu,dRgammaEle)        

        
        #### Photon categories, using genIdx branch
        # reco photons really generated as electrons
        isMisIDele = (abs(genPart[leadingPhoton.genIdx].pdgid)==13).any()

        matchedPho = (abs(genPart[leadingPhoton.genIdx].pdgid)==22).any()

        # look through parentage to find if any hadrons in genPhoton parent history
        genParent = genPart[leadingPhoton.genIdx].motherIdx
        isHadPho = (genPart[leadingPhoton.genIdx].pdgid>25).any()
        while (genParent>-1).any().sum()>0:
            genParent = genParent[genParent>-1]
            isHadPho = isHadPho | (genPart[genParent].pdgid>25).any()
            genParent = genPart[genParent].motherIdx

        
        isHadFake = (leadingPhoton.genIdx==-1).any()
        isHadFake = isHadFake & np.invert(isHadPho)

        isGenPho = matchedPho & np.invert(isHadPho)

        #define integer definition for the photon category axis
        phoCategory = 1*isGenPho + 2*isMisIDele + 3*isHadPho + 4*isHadFake

        
        #define selection levels (for cutflow synchronization)
        mu_trigger = muonTrigger        
        mu_filter = mu_trigger & filters
        mu_oneMu = oneMuon & mu_filter
        mu_noEle = eleVeto & mu_oneMu
        mu_noLoose = looseMuonSel & looseElectronSel & mu_noEle
        mu_twoJet = (tightJets.counts >= 2) & mu_noLoose
        mu_threeJet = (tightJets.counts >= 3) & mu_noLoose
        mu_jetSel = (tightJets.counts >= 4) & mu_noLoose
        mu_bjetSel = (bJets.counts >= 2) & mu_jetSel
        mu_presel = mu_bjetSel
        mu_phosel = (tightPhotons.counts >= 1) & mu_bjetSel

        ele_trigger = eleTrigger
        ele_filter = ele_trigger & filters
        ele_oneEle = oneEle & ele_filter
        ele_noMu = muVeto & ele_oneEle
        ele_noLoose = looseMuonSel & looseElectronSel & ele_noMu
        ele_twoJet = (tightJets.counts >= 2) & ele_noLoose
        ele_threeJet = (tightJets.counts >= 3) & ele_noLoose
        ele_jetSel = (tightJets.counts >= 4) & ele_noLoose
        ele_bjetSel = (bJets.counts >= 2) & ele_jetSel
        ele_presel = ele_bjetSel
        ele_phosel = (tightPhotons.counts >= 1) & ele_bjetSel

        
        
        lep_jetSel = mu_jetSel | ele_jetSel
        lep_bjetSel = mu_bjetSel | ele_bjetSel
        lep_phosel = mu_phosel | ele_phosel
        
        lepFlavor = -1*ele_noLoose + 1*mu_noLoose
        

        #get leading generated photon, for 
        genPhotonSel =  ((genPart.pdgid==22) &
                         (genPart.statusFlags&1==1) &
                         (genPart.statusFlags>>13&1==1))
        genPhotons = genPart[genPhotonSel]
        
        #find highest Pt, otherwise they are sorted by gen history
        leadingGenPhoton = genPhotons[genPhotons.pt==genPhotons.pt.max()]
    
        ext = extractor()
        ext.add_weight_sets(["btag * ScaleFactors/Btag/%s"%btagSF])
        ext.finalize()
        evaluator = ext.make_evaluator()
        
        #btag key name
        #name / working Point / type / systematic / jetType
        #  ... / 0-loose 1-medium 2-tight / comb,mujets,iterativefit / central,up,down / 0-b 1-c 2-udcsg 
        bJetSF = evaluator['btagDeepCSV_1_comb_central_0'](tightJets.eta, tightJets.pt, tightJets.btag)
        bJetSF_c = evaluator['btagDeepCSV_1_comb_central_1'](tightJets.eta, tightJets.pt, tightJets.btag)
        bJetSF_udcsg = evaluator['btagDeepCSV_1_incl_central_2'](tightJets.eta, tightJets.pt, tightJets.btag)

        bJetSF.content[(tightJets.hadFlav==4).content] = bJetSF_c[tightJets.hadFlav==4].content
        bJetSF.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg[tightJets.hadFlav==0].content
        

        ## mc efficiency lookup, data efficiency is eff* scale factor
        btagEfficiencies = taggingEffLookup(datasetFull,tightJets.hadFlav,tightJets.pt,tightJets.eta)
        btagEfficienciesData = btagEfficiencies*bJetSF

        ##probability is the product of all efficiencies of tagged jets, times product of 1-eff for all untagged jets
        ## https://twiki.cern.ch/twiki/bin/view/CMS/BTagSFMethods#1a_Event_reweighting_using_scale
        pMC   = btagEfficiencies[btagged].prod()     * (1.-btagEfficiencies[np.invert(btagged)]).prod() 
        pData = btagEfficienciesData[btagged].prod() * (1.-btagEfficienciesData[np.invert(btagged)]).prod()
        btagWeight = pData/pMC
        

        eleSF, eleSFup, eleSFdo = getEleSF(tightElectron.pt, tightElectron.eta, year)
        muSF, muSFup, muSFdo = getMuSF(tightMuon.pt, tightMuon.eta, year)
        
        
        evtWeight = np.ones(df.size)
        if not isRealData:
            evtWeight *= df['genWeight']
            evtWeight *= xsec * lumis[year] / mcSumWeight
            # FIXME: some issue with btag weight evaluation giving NaN for jets in pt 200-400
            #evtWeight *= btagWeight
            evtWeight *= eleSF
            evtWeight *= muSF
        
        output['photon_pt'].fill(dataset=dataset,
                                 pt=tightPhotons.p4.pt[:,:1][lep_phosel].flatten(),
                                 category=phoCategory[lep_phosel].flatten(),
                                 year=yearStr,
                                 lepFlavor=lepFlavor[lep_phosel],
                                 weight=evtWeight[lep_phosel].flatten())
        output['photon_eta'].fill(dataset=dataset,
                                  eta=tightPhotons.eta[:,:1][lep_phosel].flatten(),
                                  category=phoCategory[lep_phosel].flatten(),
                                  year=yearStr,
                                  lepFlavor=lepFlavor[lep_phosel],
                                  weight=evtWeight[lep_phosel].flatten())
        if 'TTGamma' in dataset:
            output['photon_RecoVsGenPt'].fill(dataset=dataset,
                                              pt=tightPhotons.pt[:,:1][lep_phosel].flatten(),
                                              genpt=leadingGenPhoton.pt[:,:1][lep_phosel].flatten(),
                                              category=phoCategory[lep_phosel].flatten(),
                                              year=yearStr,
                                              lepFlavor=lepFlavor[lep_phosel],
                                              weight=evtWeight[lep_phosel].flatten())
            output['photon_RecoVsGenEta'].fill(dataset=dataset,
                                               eta=tightPhotons.eta[:,:1][lep_phosel].flatten(),
                                               geneta=leadingGenPhoton.eta[:,:1][lep_phosel].flatten(),
                                               category=phoCategory[lep_phosel].flatten(),
                                               year=yearStr,
                                               lepFlavor=lepFlavor[lep_phosel],
                                               weight=evtWeight[lep_phosel].flatten())
        output['photon_sieie'].fill(dataset=dataset,
                                    sieie=tightPhotons.sieie[:,:1][lep_phosel].flatten(),
                                    category=phoCategory[lep_phosel].flatten(),
                                    year=yearStr,
                                    lepFlavor=lepFlavor[lep_phosel],
                                    weight=evtWeight[lep_phosel].flatten())

        output['photon_chIso'].fill(dataset=dataset,
                                    chIso=tightPhotons.chIso[:,:1][lep_phosel].flatten(),
                                    category=phoCategory[lep_phosel].flatten(),
                                    year=yearStr,
                                    lepFlavor=lepFlavor[lep_phosel],
                                    weight=evtWeight[lep_phosel].flatten())

        output['dR_lep_gamma'].fill(dataset=dataset,
                                    dR=dRgammaLep[lep_phosel].flatten(),
                                    category=phoCategory[lep_phosel].flatten(),
                                    year=yearStr,
                                    lepFlavor=lepFlavor[lep_phosel],
                                    weight=evtWeight[lep_phosel].flatten())

        output['pt_jet'].fill(dataset=dataset,
                              pt=tightJets.p4.pt[:,:1][lep_jetSel].flatten(),
                              year=yearStr,
                              nPho=tightPhotons[lep_jetSel].counts.flatten(),
                              nBjet=bJets[lep_jetSel].counts.flatten(),
                              lepFlavor=lepFlavor[lep_jetSel],
                              weight=evtWeight[lep_jetSel].flatten())
        output['jet_mult'].fill(dataset=dataset,
                                N=tightJets[lep_jetSel].counts.flatten(),
                                year=yearStr,
                                nPho=tightPhotons[lep_jetSel].counts.flatten(),
                                nBjet=bJets[lep_jetSel].counts.flatten(),
                                lepFlavor=lepFlavor[lep_jetSel],
                                weight=evtWeight[lep_jetSel].flatten())
        output['M3'].fill(dataset=dataset,
                          M3=M3[lep_jetSel].flatten(),
                          year=yearStr,
                          nPho=tightPhotons[lep_jetSel].counts.flatten(),
                          nBjet=bJets[lep_jetSel].counts.flatten(),
                          lepFlavor=lepFlavor[lep_jetSel],
#                           category=phoCategory[mu_phosel].flatten(),
                          weight=evtWeight[lep_jetSel].flatten())


        
        return output

    def postprocess(self, accumulator):
        return accumulator


In [5]:
class MCWeightSum(processor.ProcessorABC):
    def __init__(self):
        self._genw = processor.defaultdict_accumulator(float)
    
    @property
    def accumulator(self):
        return self._genw
    
    def process(self, df):
        out = self.accumulator.identity()
        out[df['dataset']] += df['genEventSumw'].sum()
        return out
        
    def postprocess(self, acc):
        return acc


In [6]:
tstart = time.time()

fileset = {
    'TTGamma_Dilept_2016': [
        'data/TTGamma_Dilept_TuneCP5_PSweights_13TeV-madgraph-pythia8.0.root',
        'data/TTGamma_Dilept_TuneCP5_PSweights_13TeV-madgraph-pythia8.1.root',
    ],
    'TTGamma_SingleLept_2016': [
        'data/TTGamma_SingleLept_TuneCP5_PSweights_13TeV-madgraph-pythia8.0.root',
        'data/TTGamma_SingleLept_TuneCP5_PSweights_13TeV-madgraph-pythia8.1.root',
    ],
    # 'TTGamma_Hadronic': [],
    'TTbar_2016': [
        'data/TTTo2L2Nu_TuneCP5_PSweights_13TeV-powheg-pythia8.0.root',
        'data/TTTo2L2Nu_TuneCP5_PSweights_13TeV-powheg-pythia8.0.root',
        'data/TTToSemiLeptonic_TuneCP5_PSweights_13TeV-powheg-pythia8.0.root',
        'data/TTToSemiLeptonic_TuneCP5_PSweights_13TeV-powheg-pythia8.1.root'
    ],
    'ZGamma_2016': [
        'data/ZGToLLG_01J_5f_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8.0.root',
        'data/ZGToLLG_01J_5f_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8.1.root',
    ],
    'WGamma_2016': [
        'data/WGToLNuG_01J_5f_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8.0.root',
        'data/WGToLNuG_01J_5f_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8.1.root',
    ]
}

filemeta = {}

mcSumWeights = processor.run_uproot_job(fileset,
                                  treename='Runs',
                                  processor_instance=MCWeightSum(),
                                  executor=processor.futures_executor,
                                  executor_args={'workers': 4, 'flatten': True},
                                  metadata_cache=filemeta,
                                 )
print(mcSumWeights)

output = processor.run_uproot_job(fileset,
                                  treename='Events',
                                  processor_instance=TTGammaProcessor(mcSumWeights=mcSumWeights),
                                  executor=processor.futures_executor,
                                  executor_args={'workers': 4, 'flatten': True},
                                  metadata_cache=filemeta,
                                 )

elapsed = time.time() - tstart
print(elapsed)

HBox(children=(IntProgress(value=0, description='Preprocessing', max=11, style=ProgressStyle(description_width…




HBox(children=(IntProgress(value=0, description='Processing', max=12, style=ProgressStyle(description_width='i…


defaultdict_accumulator(<class 'float'>, {'WGamma_2016': 431218385.23342997, 'TTbar_2016': 408626493.5750917, 'ZGamma_2016': 109249699.21376002, 'TTGamma_Dilept_2016': 58081.83072544624, 'TTGamma_SingleLept_2016': 1479820.2680320474})


HBox(children=(IntProgress(value=0, description='Preprocessing', max=11, style=ProgressStyle(description_width…




HBox(children=(IntProgress(value=0, description='Processing', max=32, style=ProgressStyle(description_width='i…




147.01381301879883


In [7]:
util.save(output, 'ttgamma.coffea')

In [8]:
from pyinstrument import Profiler

profiler = Profiler()

profiler.start()
_, metrics = processor.run_uproot_job(fileset,
                                  treename='Events',
                                  processor_instance=TTGammaProcessor(mcSumWeights=mcSumWeights),
                                  executor=processor.iterative_executor,
                                  executor_args={'flatten': True, 'savemetrics': True},
                                  metadata_cache=filemeta,
                                  maxchunks=1,
                                 )
profiler.stop()

HBox(children=(IntProgress(value=0, description='Processing', max=5, style=ProgressStyle(description_width='in…






In [11]:
metrics

{'columns': {'Electron_charge',
  'Electron_cutBased',
  'Electron_dxy',
  'Electron_dz',
  'Electron_eta',
  'Electron_mass',
  'Electron_phi',
  'Electron_pt',
  'Flag_BadPFMuonFilter',
  'Flag_EcalDeadCellTriggerPrimitiveFilter',
  'Flag_HBHENoiseFilter',
  'Flag_HBHENoiseIsoFilter',
  'Flag_globalSuperTightHalo2016Filter',
  'Flag_goodVertices',
  'GenPart_eta',
  'GenPart_genPartIdxMother',
  'GenPart_mass',
  'GenPart_pdgId',
  'GenPart_phi',
  'GenPart_pt',
  'GenPart_status',
  'GenPart_statusFlags',
  'HLT_Ele27_WPTight_Gsf',
  'HLT_IsoMu24',
  'HLT_IsoTkMu24',
  'Jet_btagDeepB',
  'Jet_eta',
  'Jet_genJetIdx',
  'Jet_hadronFlavour',
  'Jet_jetId',
  'Jet_mass',
  'Jet_phi',
  'Jet_pt',
  'Muon_charge',
  'Muon_eta',
  'Muon_isGlobal',
  'Muon_isPFcand',
  'Muon_isTracker',
  'Muon_mass',
  'Muon_pfRelIso04_all',
  'Muon_phi',
  'Muon_pt',
  'Muon_tightId',
  'Photon_cutBased',
  'Photon_electronVeto',
  'Photon_eta',
  'Photon_genPartIdx',
  'Photon_isScEtaEB',
  'Photon_isSc

In [10]:
with open('profile.html', 'w') as fout:
    fout.write(profiler.output_html())