In [1]:
%matplotlib inline
from coffea import hist
from coffea.analysis_objects import JaggedCandidateArray
import coffea.processor as processor
import numpy as np

In [2]:
class DibosonProcessor(processor.ProcessorABC):
    def __init__(self):
        dataset_axis = hist.Cat("dataset", "MET and Third Lepton")
        muon_axis = hist.Bin("massT", "Transverse Mass", 50, 20, 2000)
        
        self._accumulator = processor.dict_accumulator({
            'massT': hist.Hist("Counts", dataset_axis, muon_axis),
            'cutflow': processor.defaultdict_accumulator(int)
        })
    
    @property
    def accumulator(self):
        return self._accumulator
    
    def process(self, df):
        output = self.accumulator.identity()
        
        dataset = df["dataset"]

        muons = JaggedCandidateArray.candidatesfromcounts(
                    df['nMuon'],
                    pt=df['Muon_pt'].content,
                    eta=df['Muon_eta'].content,
                    phi=df['Muon_phi'].content,
                    mass=df['Muon_mass'].content,
                    charge=df['Muon_charge'].content
                    )
        electrons = JaggedCandidateArray.candidatesfromcounts(
                    df['nElectron'],
                    pt=df['Electron_pt'].content,
                    eta=df['Electron_eta'].content,
                    phi=df['Electron_phi'].content,
                    mass=df['Electron_mass'].content,
                    charge=df['Electron_charge'].content
                    )
        MET_pt = df['MET_pt']
        MET_phi = df['MET_phi']      

        # This function gets us a Boolean array which highlights the mass in a sublist that is closest to 91.2
        def closest(masses):
            delta = abs(91.2 - masses)
            closest_masses = delta.min()
            is_closest = (delta == closest_masses)
            return is_closest

        # This function calculates the transverse mass of MET and the third lepton in a triplet.
        def transverse(MET_pts, MET_phis, triplets):
            third_leptons = triplets.i1
            theta = abs(MET_phis - third_leptons.phi)
            massT = 2 * MET_pts * third_leptons.pt * (1 - np.cos(theta))
            return massT
        
        output['cutflow']['total muons'] = muons.counts.sum()
        output['cutflow']['total electrons'] = electrons.counts.sum()
        
        # Selects events that have exactly 3 leptons. We have to keep muons and electrons separate.
        three_leptons = (muons.counts + electrons.counts) == 3
        muons_3lep = muons[three_leptons]
        electrons_3lep = electrons[three_leptons]

        output['cutflow']['muons in trilepton system'] = muons_3lep.counts.sum()
        output['cutflow']['electrons in trilepton system'] = electrons_3lep.counts.sum()
        
        # There are four possible combinations of 3 leptons: (ee)m (mm)e (ee)e (mm)m
        # eem and mme are easy: we choose two from e's and m's, and cross it with the other.
        # Now we have a single ((e, e), m), a tuple with a pair and an extra lepton.
        eem = electrons_3lep.choose(2).cross(muons_3lep)
        mme = muons_3lep.choose(2).cross(electrons_3lep)

        # eee and mmm are harder. We want events with exactly 3 e's or 3 m's. Choose two, cross with self, get ((e, e), e).
        eee = electrons_3lep[electrons_3lep.counts == 3]
        eee = eee.choose(2).cross(eee)
        # Unfortunately, this has redundant elements: ((1, 2), 1) for instance. So we check that the 3rd lepton is not in pair.
        # This will get us 3 tuples per event, with each possible combination.
        eee = eee[np.not_equal(eee.i0.i0.p4, eee.i1.p4) & np.not_equal(eee.i0.i1.p4, eee.i1.p4)]
        mmm = muons_3lep[muons_3lep.counts == 3]
        mmm = mmm.choose(2).cross(mmm)
        mmm = mmm[np.not_equal(mmm.i0.i0.p4, mmm.i1.p4) & np.not_equal(mmm.i0.i1.p4, mmm.i1.p4)]
        
        # We want the MET as well, so we have to get the MET for the leptons we're selecting above (with only 3 total)
        MET_pt = MET_pt[three_leptons]
        MET_phi = MET_phi[three_leptons]
        
        # Check opposite charge.
        good_eem = eem[eem.i0.i0.charge != eem.i0.i1.charge]
        # The opposite charge check has empties, whereas our MET does not. In order to remove MET values associated
        # with empty eem's, we get a mask of not-empties and mask both eem and MET_eem with it.
        eem_notempty = good_eem.counts > 0
        final_eem = good_eem[eem_notempty]
        MET_pt_eem = MET_pt[eem_notempty]
        MET_phi_eem = MET_phi[eem_notempty]

        good_mme = mme[mme.i0.i0.charge != mme.i0.i1.charge]
        mme_notempty = good_mme.counts > 0
        final_mme = good_mme[mme_notempty]
        MET_pt_mme = MET_pt[mme_notempty]
        MET_phi_mme = MET_phi[mme_notempty]

        # For eee and mmm, the process is slightly different again. We can get the MET strictly for the 3e and 3m events.
        MET_pt_3e = MET_pt[electrons_3lep.counts == 3]
        MET_phi_3e = MET_phi[electrons_3lep.counts == 3]
        MET_pt_3m = MET_pt[muons_3lep.counts == 3]
        MET_phi_3m = MET_phi[muons_3lep.counts == 3]
        
        # Again, check that they have opposite charge.
        good_eee = eee[eee.i0.i0.charge != eee.i0.i1.charge]
        good_mmm = mmm[mmm.i0.i0.charge != mmm.i0.i1.charge]

        # Use the closest function above to see which of the 3 triplets in an event is closest to a Z.
        closest_eee = good_eee[closest(good_eee.i0.mass)]
        # Same process as above for eem, getting not-empty mask and applying it.
        eee_notempty = closest_eee.counts > 0
        final_eee = closest_eee[eee_notempty]
        MET_pt_eee = MET_pt_3e[eee_notempty]
        MET_phi_eee = MET_phi_3e[eee_notempty]

        closest_mmm = good_mmm[closest(good_mmm.i0.mass)]
        mmm_notempty = closest_mmm.counts > 0
        final_mmm = closest_mmm[mmm_notempty]
        MET_pt_mmm = MET_pt_3m[mmm_notempty]
        MET_phi_mmm = MET_phi_3m[mmm_notempty]
        
        output['cutflow']['final eee combinations'] = final_eee.counts.sum()
        output['cutflow']['final mmm combinations'] = final_mmm.counts.sum()
        output['cutflow']['final eem combinations'] = final_eem.counts.sum()
        output['cutflow']['final mme combinations'] = final_mme.counts.sum()
        
        # Use the transverse function above to calculate the transverse mass between MET and each combination.
        eem_massT = transverse(MET_pt_eem, MET_phi_eem, final_eem).content
        mme_massT = transverse(MET_pt_mme, MET_phi_mme, final_mme).content
        eee_massT = transverse(MET_pt_eee, MET_phi_eee, final_eee).content
        mmm_massT = transverse(MET_pt_mmm, MET_phi_mmm, final_mmm).content
        
        # Put together all the transverse masses into one array.
        massT = np.concatenate((eem_massT, mme_massT, eee_massT, mmm_massT))
        
        output['massT'].fill(dataset=dataset, massT=massT)
        return output

    def postprocess(self, accumulator):
        return accumulator

In [3]:
fileset = {'massT': ["data/Run2012B_SingleMu.root"]}

output = processor.run_uproot_job(fileset,
                                 treename='Events',
                                 processor_instance=DibosonProcessor(),
                                 executor=processor.futures_executor,
                                 executor_args={'workers':4},
                                 chunksize = 1000000)

Preprocessing: 100%|██████████| 1/1 [00:00<00:00, 57.02it/s]
Processing:   0%|          | 0/54 [00:00<?, ?items/s]


TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Internal error at <numba.typeinfer.ArgConstraint object at 0x7fdd6fb11208>:
--%<----------------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/hep/madamec/.local/lib/python3.6/site-packages/numba/errors.py", line 627, in new_error_context
    yield
  File "/home/hep/madamec/.local/lib/python3.6/site-packages/numba/typeinfer.py", line 201, in __call__
    assert ty.is_precise()
AssertionError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hep/madamec/.local/lib/python3.6/site-packages/numba/typeinfer.py", line 144, in propagate
    constraint(typeinfer)
  File "/home/hep/madamec/.local/lib/python3.6/site-packages/numba/typeinfer.py", line 202, in __call__
    typeinfer.add_type(self.dst, ty, loc=self.loc)
  File "/cvmfs/sft.cern.ch/lcg/releases/Python/3.6.5-54b64/x86_64-slc6-gcc62-opt/lib/python3.6/contextlib.py", line 99, in __exit__
    self.gen.throw(type, value, traceback)
  File "/home/hep/madamec/.local/lib/python3.6/site-packages/numba/errors.py", line 635, in new_error_context
    six.reraise(type(newerr), newerr, tb)
  File "/home/hep/madamec/.local/lib/python3.6/site-packages/numba/six.py", line 659, in reraise
    raise value
numba.errors.InternalError: 
[1] During: typing of argument at /home/hep/madamec/.local/lib/python3.6/site-packages/awkward/numba/array/jagged.py (254)
--%<----------------------------------------------------------------------------


File "../../.local/lib/python3.6/site-packages/awkward/numba/array/jagged.py", line 254:
    def _getitem_impl(self, newwhere):
        return self[newwhere]
        ^

This error may have been caused by the following argument(s):
- argument 0: cannot determine Numba type of <class 'uproot_methods.classes.TLorentzVector.PtEtaPhiMassLorentzVectorArray'>

This is not usually a problem with Numba itself but instead often caused by
the use of unsupported features or an issue in resolving types.

To see Python/NumPy features supported by the latest release of Numba visit:
http://numba.pydata.org/numba-doc/dev/reference/pysupported.html
and
http://numba.pydata.org/numba-doc/dev/reference/numpysupported.html

For more information about typing errors and how to debug them visit:
http://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#my-code-doesn-t-compile

If you think your code should work with Numba, please report the error message
and traceback, along with a minimal reproducer at:
https://github.com/numba/numba/issues/new


In [None]:
hist.plot1d(output['massT'], overlay='dataset', fill_opts={'edgecolor': (0,0,0,0.3), 'alpha': 0.8})

In [None]:
for key, value in output['cutflow'].items():
    print(key, value)