In [87]:

import re
from typing import Any, Callable, List, Tuple, Union

import awkward
import numpy

import coffea

behavior = {}
@awkward.mixin_class(behavior)
class Systematic:
    """A base mixin class to describe and build variations on a feature of an nanoevents object."""

    _systematic_kinds = set()

    @classmethod
    def add_kind(cls, kind: str):
        """
        Register a type of systematic variation, it must fulfill the base class interface.
        """
        cls._systematic_kinds.add(kind)

    def _ensure_systematics(self):
        """
        Make sure that the parent object always has a field called '__systematics__'.
        """
        if "__systematics__" not in awkward.fields(self):
            self["__systematics__"] = {}

    @property
    def systematics(self):
        """
        Return the list of all systematics attached to this object.
        """
        regex = re.compile(r"\_{2}.*\_{2}")
        self._ensure_systematics()
        fields = [
            f for f in awkward.fields(self["__systematics__"]) if not regex.match(f)
        ]
        return self["__systematics__"][fields]

    def add_systematic(
        self,
        name: str,
    ):
        """
        name: str, name of the systematic variation / uncertainty source
        kind: str, the name of the kind of systematic variation
        what: Union[str, List[str], Tuple[str]], name what gets varied, this could be a list or tuple of column names
        varying_function: Union[function, bound method], a function that describes how 'what' is varied, it must close over all non-event-data arguments.
        """
        self._ensure_systematics()

        if name in awkward.fields(self["__systematics__"]):
            raise ValueError(f"{name} already exists as a systematic for this object!")


        # rendered_type = flat.layout.parameters["__record__"]
        # as_syst_type = awkward.with_parameter(flat, "__record__", kind)
        # as_syst_type._build_variations(name, what, varying_function)
        # variations = as_syst_type.describe_variations()
        self['__systematics__']['name'] = {}
        

behavior[("__typestr__", "Systematic")] = "Systematic"

Systematic.add_kind('ciao')

In [99]:
behavior = {}
@awkward.mixin_class(behavior)
class Test:
    def _ensure_systematics(self):
        """
        Make sure that the parent object always has a field called '__systematics__'.
        """
        if "__systematics__" not in awkward.fields(self):
            self["__systematics__"] = {}
    def add_systematic(self, name):
        self._ensure_systematics()
        print(type(self['__systematics__']))
        self['__systematics__'][name] = {}


In [93]:
Systematic._systematic_kinds

{'ciao'}

In [91]:
behavior

{'Systematic': __main__.SystematicRecord,
 ('*', 'Systematic'): __main__.SystematicArray,
 ('__typestr__', 'Systematic'): 'Systematic'}

In [88]:
import awkward as ak

In [100]:
_events = ak.with_name(events, name='Test', behavior=behavior)

In [101]:
_events.add_systematic('ciao')

<class 'awkward.highlevel.Array'>


In [98]:
_events.__systematics__

In [86]:
_events.systematics

AttributeError: no field named 'systematics'

In [79]:


class _ClassMethodFn:
    def __init__(self, attr: str, **kwargs: Any) -> None:
        self.attr = attr

    def __call__(self, coll: awkward.Array, *args: Any, **kwargs: Any) -> awkward.Array:
        return getattr(coll, self.attr)(*args, **kwargs)


@awkward.mixin_class(behavior)
class Systematic:
    """A base mixin class to describe and build variations on a feature of an nanoevents object."""

    _systematic_kinds = set()

    @classmethod
    def add_kind(cls, kind: str):
        """
        Register a type of systematic variation, it must fulfill the base class interface.
        """
        cls._systematic_kinds.add(kind)

    def _ensure_systematics(self):
        """
        Make sure that the parent object always has a field called '__systematics__'.
        """
        if "__systematics__" not in awkward.fields(self):
            self["__systematics__"] = {}

    @property
    def systematics(self):
        """
        Return the list of all systematics attached to this object.
        """
        regex = re.compile(r"\_{2}.*\_{2}")
        self._ensure_systematics()
        fields = [
            f for f in awkward.fields(self["__systematics__"]) if not regex.match(f)
        ]
        return self["__systematics__"][fields]

    @abstractmethod
    def _build_variations(
        self,
        name: str,
        what: Union[str, List[str], Tuple[str]],
        varying_function: Callable,
    ):
        """
        name: str, name of the systematic variation / uncertainty source
        what: Union[str, List[str], Tuple[str]], name what gets varied,
              this could be a list or tuple of column names
        varying_function: Union[function, bound method, partial], a function that describes how 'what' is varied
        define how to manipulate the output of varying_function to produce all systematic variations. Varying function
        must close over all non-event-data arguments.
        """
        pass

    @abstractmethod
    def explodes_how(self):
        """
        This describes how a systematic uncertainty needs to be evaluated in the context of other systematic uncertainties.
        i.e. Do you iterate over this keeping all others fixed or do you need to have correlations with other (subsets of) systematics.
        """
        # this function contains decades of thinking about iterate over systematics variations
        # your opinions about systematics go here. :D
        pass

    @abstractmethod
    def describe_variations(self):
        """returns a list of variation names"""
        pass

    def add_systematic(
        self,
        name: str,
        kind: str,
        what: Union[str, List[str], Tuple[str]],
        varying_function: Callable,
    ):
        """
        name: str, name of the systematic variation / uncertainty source
        kind: str, the name of the kind of systematic variation
        what: Union[str, List[str], Tuple[str]], name what gets varied, this could be a list or tuple of column names
        varying_function: Union[function, bound method], a function that describes how 'what' is varied, it must close over all non-event-data arguments.
        """
        self._ensure_systematics()

        if name in awkward.fields(self["__systematics__"]):
            raise ValueError(f"{name} already exists as a systematic for this object!")

        if kind not in self._systematic_kinds:
            raise ValueError(
                f"{kind} is not an available systematics type, please add it and try again!"
            )

        wrap = partial(
            awkward_rewrap, like_what=self["__systematics__"], gfunc=rewrap_recordarray
        )
        flat = (
            self
            if isinstance(self, coffea.nanoevents.methods.base.NanoEvents)
            else awkward.flatten(self)
        )

        if what == "weight" and "__ones__" not in awkward.fields(
            flat["__systematics__"]
        ):
            flat["__systematics__", "__ones__"] = numpy.ones(
                len(flat), dtype=numpy.float32
            )

        rendered_type = flat.layout.parameters["__record__"]
        as_syst_type = awkward.with_parameter(flat, "__record__", kind)
        as_syst_type._build_variations(name, what, varying_function)
        variations = as_syst_type.describe_variations()

        flat["__systematics__", name] = awkward.zip(
            {
                v: getattr(as_syst_type, v)(name, what, rendered_type)
                for v in variations
            },
            depth_limit=1,
            with_name=f"{name}Systematics",
        )

        self["__systematics__"] = wrap(flat["__systematics__"])
        self.behavior[("__typestr__", f"{name}Systematics")] = f"{kind}"


behavior[("__typestr__", "Systematic")] = "Systematic"

NameError: name 'abstractmethod' is not defined

## Read events

In [2]:
uproot.__version__

'5.3.1'

In [3]:
!ls /Users/giorgiopizzati/testcoffea/rootFiles

7B930101-EB91-4F4E-9B90-0861460DBD94.root
85999D7A-3836-6446-AED2-136D6FC874BA.root
BF8F0CF8-4DD5-904E-AD60-2E425E19EC6F.root


In [1]:
import uproot
import awkward as ak
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath('.')))
from framework import read_events
import json
with open('../data/common/forms.json') as file:
    forms = json.load(file)
events = read_events('/Users/giorgiopizzati/testcoffea/rootFiles/7B930101-EB91-4F4E-9B90-0861460DBD94.root', 0, 10_000, forms['mc'])

start reading
created events


In [9]:
import importlib

In [10]:
import variation as variation_module

In [11]:
importlib.reload(variation_module)

<module 'variation' from '/Users/giorgiopizzati/development/cern/my_processor/variation.py'>

In [12]:
from typing import NewType
from coffea.lookup_tools.correctionlib_wrapper import correctionlib_wrapper

correctionlib_evaluator = NewType('correctionlib_evaluator', any)

In [14]:
def format_rule(column, variation_name):
        tag = variation_name.split('_')[-1]
        if isinstance(column, str):
            return f"{column}_{tag}"
        elif isinstance(column, tuple):
            _list = list(column[:-1])
            _list.append(f"{column[-1]}_{tag}")
            return tuple(_list)
        else:
            print(
                "Cannot format varied column", column, "for variation", variation_name
            )
            raise Exception



In [15]:
@variation_module.vary(reads_columns=[('Jet', 'pt'), ('Jet', 'puId'), ('Jet', 'genJetIdx')])
def func(events: ak.Array, variations: variation_module.Variation, ceval_puid: correctionlib_evaluator, doVariations:bool=False):
    wrap_c = correctionlib_wrapper(ceval_puid["PUJetID_eff"])
    puId_shift = 1 << 2
    pass_puId = ak.values_astype(events.Jet.puId & puId_shift, bool)

    jet_genmatched = (events.Jet.genJetIdx >= 0) & (
        events.Jet.genJetIdx < ak.num(events.GenJet)
    )
    mask = jet_genmatched & pass_puId & (15.0 < events.Jet.pt) & (events.Jet.pt < 50.0)
    jets = ak.mask(events.Jet, mask)

    if not doVariations:
        sf = wrap_c(jets.eta, jets.pt, "nom", "L")
        sf = ak.fill_none(sf, 1.0)
        events[("Jet", "PUID_SF")] = sf
    else:
        sf_up = wrap_c(jets.eta, jets.pt, "up", "L")
        sf_down = wrap_c(jets.eta, jets.pt, "down", "L")

        sf_up = ak.fill_none(sf_up, 1.0)
        sf_down = ak.fill_none(sf_down, 1.0)

        events[("Jet", "PUID_SF_up")] = sf_up
        events[("Jet", "PUID_SF_down")] = sf_down
        variations.register_variation(columns=[('Jet', 'PUID_SF')], variation_name='PUID_SF_up', format_rule=format_rule)
        variations.register_variation(columns=[('Jet', 'PUID_SF')], variation_name='PUID_SF_down', format_rule=format_rule)

    return events, variations

In [16]:
def puid_sf(events, variations, ceval_puid):
    events, variations = func(events, variations, ceval_puid,  doVariations=False)
    # now doing variations

    events, variations = func(events, variations, ceval_puid, doVariations=True)

    return events, variations

In [17]:
import correctionlib

In [18]:
ceval_puid = correctionlib.CorrectionSet.from_file("/Users/giorgiopizzati/Downloads/jmar.json.gz")

In [19]:
import variation as variation_module
variations = variation_module.Variation()

In [20]:
import numpy as np

In [21]:
for variation in ['JESAbsolute', 'JESTotal']:
    for tag in ['up', 'down']:
        pt = ak.flatten(events.Jet.pt).to_numpy()
        pt = pt * np.random.normal(1.0, 0.1, size=len(pt))
        num = ak.num(events.Jet.pt)
        vname = f"{variation}_{tag}"
        events[('Jet', f"pt_{vname}")] = ak.unflatten(pt, num)
        variations.register_variation([('Jet', 'pt')], variation_name=vname)


In [22]:
events, variations = puid_sf(events, variations, ceval_puid)

[{GenPart: [{pt: 0, eta: ..., ...}, ...], GenJet: [{...}], ...}, ..., {...}]
<variation.Variation object at 0x282a35d50>
<correctionlib.highlevel.CorrectionSet object at 0x169242920>
[('GenPart', 'pt'), ('GenPart', 'eta'), ('GenPart', 'phi'), ('GenPart', 'mass'), ('GenPart', 'pdgId'), ('GenPart', 'status'), ('GenPart', 'genPartIdxMother'), ('GenPart', 'statusFlags'), ('GenJet', 'pt'), ('GenJet', 'eta'), ('GenJet', 'phi'), ('GenJet', 'mass'), ('Electron', 'pt'), ('Electron', 'eta'), ('Electron', 'phi'), ('Electron', 'pdgId'), ('Electron', 'mvaFall17V2Iso_WP90'), ('Electron', 'convVeto'), ('Electron', 'pfRelIso03_all'), ('Electron', 'cutBased'), ('Electron', 'sieie'), ('Electron', 'eInvMinusPInv'), ('Electron', 'dxy'), ('Electron', 'dz'), ('Muon', 'pt'), ('Muon', 'eta'), ('Muon', 'phi'), ('Muon', 'pdgId'), ('Muon', 'tightId'), ('Muon', 'pfRelIso04_all'), ('Muon', 'dxy'), ('Muon', 'dz'), ('Muon', 'genPartIdx'), ('Muon', 'nTrackerLayers'), ('fixedGridRhoFastjetAll',), ('HLT', 'Mu23_TrkIsoV

In [23]:
variations.variations_dict

{'JESAbsolute_up': [(('Jet', 'pt'), ('Jet', 'pt_JESAbsolute_up')),
  (('Jet', 'PUID_SF'), ('Jet', 'PUID_SF_JESAbsolute_up'))],
 'JESAbsolute_down': [(('Jet', 'pt'), ('Jet', 'pt_JESAbsolute_down')),
  (('Jet', 'PUID_SF'), ('Jet', 'PUID_SF_JESAbsolute_down'))],
 'JESTotal_up': [(('Jet', 'pt'), ('Jet', 'pt_JESTotal_up')),
  (('Jet', 'PUID_SF'), ('Jet', 'PUID_SF_JESTotal_up'))],
 'JESTotal_down': [(('Jet', 'pt'), ('Jet', 'pt_JESTotal_down')),
  (('Jet', 'PUID_SF'), ('Jet', 'PUID_SF_JESTotal_down'))],
 'PUID_SF_up': [(('Jet', 'PUID_SF'), ('Jet', 'PUID_SF_up'))],
 'PUID_SF_down': [(('Jet', 'PUID_SF'), ('Jet', 'PUID_SF_down'))]}