In [1]:
import hist
import pandas as pd
import numpy as np
import json
import awkward as ak
import correctionlib
from typing import Type
import importlib.resources
from coffea.analysis_tools import PackedSelection
from coffea.analysis_tools import Weights
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from utils import pog_years, get_pog_json

In [2]:
fname = "root://xcache//store/mc/RunIISummer20UL17NanoAODv2/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/NANOAODSIM/106X_mc2017_realistic_v8-v1/120000/2A2F4EC9-F9BB-DF43-B08D-525B5389937E.root"
events = NanoEventsFactory.from_root(fname, entry_stop=100000, schemaclass=NanoAODSchema).events()

weights = Weights(len(events), storeIndividual=True)

In [3]:
events.MET.fields

['MetUnclustEnUpDeltaX',
 'MetUnclustEnUpDeltaY',
 'covXX',
 'covXY',
 'covYY',
 'phi',
 'pt',
 'significance',
 'sumEt',
 'sumPtUnclustered',
 'fiducialGenPhi',
 'fiducialGenPt']

In [5]:
good = ((events.Tau.pt >= 20)
                & (np.abs(events.Tau.eta) <= 2.3)
                # github.com/bonanomi/hh2bbww/blob/388efda4e9a6a207d4e983c7f7528acb3a4c374f/hbw/selection/default.py#L295
                & (events.Tau.idDeepTau2017v2p1VSjet >= 6)  # VVVLoose,VVLoose,VLoose,Loose,Medium,Tight,VTight,VVTight
                & (events.Tau.idDeepTau2017v2p1VSe >= 5)    # VVVLoose,VVLoose,VLoose,Loose,Medium,Tight,VTight,VVTight
                & (events.Tau.idDeepTau2017v2p1VSmu >= 4)   # VLoose,Loose,Medium,Tight
                & (np.abs(events.Tau.dz) < 0.2)
                & (events.Tau.idDecayModeNewDMs)    
                & (
                    (events.Tau.decayMode == 0)             # 0 (tau->pi)
                    | (events.Tau.decayMode == 1)           # 1 (tau->rho->pi+pi0)
                    | (events.Tau.decayMode == 1)           # 2 (tau->a1->pi+2pi0)
                    | (events.Tau.decayMode == 10)          # 10 (tau->a1->3pi)
                    | (events.Tau.decayMode == 11)          # 11 (tau->3pi+pi0)
                )
       )

events.Tau[good]

<TauArray [[], [Tau], [], [], ... [], [], [], []] type='100000 * var * tau'>

In [6]:
"""
https://github.com/schaefes/hh2bbtautau/blob/da6d47a7ddb2b1e7ffda06b8a96c6ddead2824b8/hbt/production/tau.py#L108

"""

class TauCorrector:
    def __init__(
        self,
        taus: ak.Array,
        weights: Type[Weights],
        year: str = "2017",
        year_mod: str = "",
        tag: str = "tau",
        variation: str = "nom"
    ) -> None:

        self.variation = variation
        
        # tau array
        self.taus = taus

        # tau transverse momentum and pseudorapidity
        self.tau_pt = np.array(ak.fill_none(self.taus.pt, 0.0))
        self.tau_eta = np.array(ak.fill_none(self.taus.eta, 0.0))
        self.tau_dm = ak.to_numpy(self.taus.decayMode, allow_missing=True)
        self.tau_genPart = ak.to_numpy(self.taus.genPartFlav, allow_missing=True)

        # weights container
        self.weights = weights

        # define correction set_id
        self.cset = correctionlib.CorrectionSet.from_file(
            get_pog_json(json_name="tau", year=year + year_mod)
        )
                
        # define correction set_reco
        if (year == "2017"):
            self.recoset = correctionlib.CorrectionSet.from_file(
                "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG/TAU/2017_ReReco/tau.json.gz"
        )            
        elif (year == "2018"):
            self.recoset = correctionlib.CorrectionSet.from_file(
                "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG/TAU/2018_ReReco/tau.json.gz"
        )  
        
        
        self.year = year
        self.year_mod = year_mod
        self.pog_year = pog_years[year + year_mod]

        self.tag = tag


    def add_id_weight_DeepTau2017v2p1VSe(self, working_point: str = "Tight", systematic: str = "nom"):
        # tau gen particle. We only need to consider values: 1, 3. 0 is unmached 
        e_mask = (
            (self.tau_genPart == 1) | 
            (self.tau_genPart == 3)
        )
        
        # tau pseudorapidity range: [0.0, 2.3)
        tau_eta = np.clip(self.tau_eta.copy(), 0.0, 2.3)
        
        
        # genmatch
        tau_gen = ak.fill_none(ak.mask(self.tau_genPart, e_mask),0)
        
        
        # get scale factors
        values = {}    
        
        
        """
        Sf is called with:
        
        evaluate(eta (real),  genmatch (int) , wp (string), syst (string))
        
        """       
        values["nominal"] = self.cset["DeepTau2017v2p1VSe"].evaluate(tau_eta, tau_gen, "Medium", "nom")
        
        # -------------------
        # Systematics        
        # -------------------    
        if self.variation == "nom":
            values["up"] = self.cset["DeepTau2017v2p1VSe"].evaluate(tau_eta, tau_gen, "Medium", "up")
            values["down"] = self.cset["DeepTau2017v2p1VSe"].evaluate(tau_eta, tau_gen, "Medium", "down")
            
            # add scale factors to weights container
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
                weightUp=values["up"],
                weightDown=values["down"],
        )
        else:
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
        )
        
        
    def add_id_weight_DeepTau2017v2p1VSmu(self, working_point: str = "Tight", systematic: str = "nom"):
        # tau gen particle. We only need to consider values: 2, 4. 0 is unmached 
        mu_mask = (
            (self.tau_genPart == 2) | 
            (self.tau_genPart == 4)
        )

        # tau pseudorapidity range: [0.0, 2.3)
        tau_eta = np.clip(self.tau_eta.copy(), 0.0, 2.3)
        
        
        # genmatch
        tau_gen = ak.fill_none(ak.mask(self.tau_genPart, mu_mask),0)
            
        
        # get scale factors
        values = {}    
        
        
        """
        Sf is called with:
        
        evaluate(eta (real),  genmatch (int) , wp (string), syst (string))
        
        """       
        values["nominal"] = self.cset["DeepTau2017v2p1VSmu"].evaluate(tau_eta, tau_gen, working_point, "nom")

        # -------------------
        # Systematics        
        # -------------------    
        if self.variation == "nom":
            values["up"] = self.cset["DeepTau2017v2p1VSmu"].evaluate(tau_eta, tau_gen, working_point, "up")
            values["down"] = self.cset["DeepTau2017v2p1VSmu"].evaluate(tau_eta, tau_gen, working_point, "down")
            
            # add scale factors to weights container
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
                weightUp=values["up"],
                weightDown=values["down"],
        )
        else:
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
        )
            
        
    def add_id_weight_DeepTau2017v2p1VSjet(self, 
                                           working_point: str = "Tight", 
                                           working_point_VSe: str = "Tight", 
                                           systematic: str = "nom", 
                                           flag: str = "pt"
                                          ):
        
        # tau gen particle. We only need to consider values: 5 in genmatch. 0 is unmached
        # For dm, the possible values will be 0, 1, 2, 10, 11   
        tau_mask_gm = (self.tau_genPart == 5)
        tau_mask_dm = (
            (self.tau_dm == 0) |
            (self.tau_dm == 1) |
            (self.tau_dm == 2) |
            (self.tau_dm == 10) |
            (self.tau_dm == 11)
        )

        # tau pt
        tau_pt = self.tau_pt
        

        # tau decay mode
        tau_dm = ak.fill_none(ak.mask(self.tau_dm, tau_mask_dm),0)
        
        
        # genmatch
        tau_gen = ak.fill_none(ak.mask(self.tau_genPart, tau_mask_gm),0)

       
        # get scale factors
        values = {}
        
        
        """
        https://github.com/LEAF-HQ/LEAF/blob/d22cc55594a4b16d061c25dbf7ecdec04eedbc34/Analyzer/src/TauScaleFactorApplicatorJson.cc#L28
        
        Sf is called with:
        
        evaluate(pt (real),  dm (int), genmatch (int), wp (string), wp_VSe (string), syst (string), flag (string))
        
         - dm (decay mode): 0 (tau->pi); 1 (tau->rho->pi+pi0); 2 (tau->a1->pi+2pi0); 10 (tau->a1->3pi); 11 (tau->3pi+pi0)
         - getmatch: 0 or 6 = unmatched or jet, 1 or 3 = electron, 2 or 4 = muon, 5 = real tau
         - flag: We have worked in 'pt' = pT-dependent
         
        """
  
        values["nominal"] = self.cset["DeepTau2017v2p1VSjet"].evaluate(tau_pt, tau_dm, tau_gen, working_point, working_point_VSe, "nom", flag)
        
        # -------------------
        # Systematics        
        # -------------------    
        if self.variation == "nom":
            values["up"] = self.cset["DeepTau2017v2p1VSjet"].evaluate(tau_pt, tau_dm, tau_gen, working_point, working_point_VSe, "up", flag)
            values["down"] = self.cset["DeepTau2017v2p1VSjet"].evaluate(tau_pt, tau_dm, tau_gen, working_point, working_point_VSe, "down", flag)
        
            
            # add scale factors to weights container
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
                weightUp=values["up"],
                weightDown=values["down"],
        )
        else:
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
        )


    def add_id_weight_tau_energy_scale(self, id: str = "DeepTau2017v2p1" , systematic: str = "nom"):
        mask_gm = (
            (self.tau_genPart == 1) | 
            (self.tau_genPart == 2) | 
            (self.tau_genPart == 5) | 
            (self.tau_genPart == 6)
        )
        mask_dm = (
            (self.tau_dm == 0) |
            (self.tau_dm == 1) |
            (self.tau_dm == 2) |
            (self.tau_dm == 10) |
            (self.tau_dm == 11)
        )
        
        # tau pt
        tau_pt = self.tau_pt
        
        # tau pseudorapidity range: [0.0, 2.3)
        tau_eta = np.clip(self.tau_eta.copy(), 0.0, 2.5)
        
        # tau decay mode
        tau_dm = ak.fill_none(ak.mask(self.tau_dm, mask_dm),0)
        
        
        # genmatch
        tau_gen = ak.fill_none(ak.mask(self.tau_genPart, mask_gm),0)
           
        
        # get scale factors
        values = {}
        
                
        """
        Sf is called with:
        
        evaluate(pt (real); eta (real);  dm (int); genmatch (int); id (string); syst (string))
        
        """
        
        values["nominal"] = self.cset["tau_energy_scale"].evaluate(tau_pt, tau_eta, tau_dm, tau_gen, id, "nom")

        # -------------------
        # Systematics        
        # -------------------    
        if self.variation == "nom":
            values["up"] = self.cset["tau_energy_scale"].evaluate(tau_pt, tau_eta, tau_dm, tau_gen, id, "up")
            values["down"] = self.cset["tau_energy_scale"].evaluate(tau_pt, tau_eta, tau_dm, tau_gen, id, "down")
        
            
            # add scale factors to weights container
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
                weightUp=values["up"],
                weightDown=values["down"],
        )
        else:
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
        )        
        
        
    def add_reco_weight_antiEleMVA6(self, working_point: str = "Tight" , systematic: str = "nom"):
        # tau gen particle. We only need to consider values: 1, 3. 0 is unmached 
        mu_mask = (
            (self.tau_genPart == 2) | 
            (self.tau_genPart == 4)
        )

        
        # tau pseudorapidity range: [0.0, 2.3)
        tau_eta = np.clip(self.tau_eta.copy(), 0.0, 2.3)
        
        
        # genmatch
        tau_gen = ak.fill_none(ak.mask(self.tau_genPart, mu_mask),0)
        
        
        # get scale factors
        values = {}

        """
        Sf is called with:
        
        evaluate(eta (real);  genmatch (int); wp (string); syst (string))
        
        """
        values["nominal"] = self.recoset["antiMu3"].evaluate(tau_eta, tau_gen, working_point, "nom")
        
        # -------------------
        # Systematics        
        # -------------------    
        if self.variation == "nom":
            values["up"] = self.recoset["antiMu3"].evaluate(tau_eta, tau_gen, working_point, "up")
            values["down"] = self.recoset["antiMu3"].evaluate(tau_eta, tau_gen, working_point, "down")
        
            
            # add scale factors to weights container
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
                weightUp=values["up"],
                weightDown=values["down"],
        )
        else:
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
        )    
        
        
        
    def add_reco_weight_antiMu3(self, working_point: str = "Tight" , systematic: str = "nom"):
        # tau gen particle. We only need to consider values: 1, 3. 0 is unmached 
        e_mask = (
            (self.tau_genPart == 1) | 
            (self.tau_genPart == 3)
        )

        
        # tau pseudorapidity range: [0.0, 2.3)
        tau_eta = np.clip(self.tau_eta.copy(), 0.0, 2.3)
        
        
        # genmatch
        tau_gen = ak.fill_none(ak.mask(self.tau_genPart, e_mask),0)
        
        
        # get scale factors
        values = {}

        """
        Sf is called with:
        
        evaluate(eta (real);  genmatch (int); wp (string); syst (string))
        
        """
        values["nominal"] = self.recoset["antiEleMVA6"].evaluate(tau_eta, tau_gen, working_point, "nom")

        # -------------------
        # Systematics        
        # -------------------    
        if self.variation == "nom":
            values["up"] =  self.recoset["antiEleMVA6"].evaluate(tau_eta, tau_gen, working_point, "up")
            values["down"] =  self.recoset["antiEleMVA6"].evaluate(tau_eta, tau_gen, working_point, "down")        
            
            # add scale factors to weights container
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
                weightUp=values["up"],
                weightDown=values["down"],
        )
        else:
            self.weights.add(
                name=f"{self.tag}_id",
                weight=values["nominal"],
        )   

In [7]:
weights_container = Weights(len(events), storeIndividual=True)

tau_corrector = TauCorrector(taus = ak.firsts(events.Tau), weights=weights_container, 
                            year="2017", year_mod="", 
                            tag="tau", 
                            variation="nom")

tau_corrector.add_id_weight_DeepTau2017v2p1VSe("Tight", "nom")
tau_corrector.add_id_weight_DeepTau2017v2p1VSmu("Tight", "nom")
tau_corrector.add_id_weight_DeepTau2017v2p1VSjet("Tight", "Tight", "nom", "pt")
tau_corrector.add_id_weight_tau_energy_scale("DeepTau2017v2p1", "nom")
tau_corrector.add_reco_weight_antiEleMVA6("Tight","nom")
tau_corrector.add_reco_weight_antiMu3("Tight","nom")

In [8]:
weights_container.weight()

array([1.015     , 0.85970408, 2.19784158, ..., 1.        , 1.        ,
       1.        ])

In [9]:
weights_keys = list(weights_container._weights.keys())
print(weights_keys)

['tau_id']


In [13]:
weight_info = weights_container.get("tau_id")
weight_up_variation = weight_info.weightUp

AttributeError: 'Weights' object has no attribute 'get'