In [1]:
import time

from coffea import hist
from coffea.analysis_objects import JaggedCandidateArray
import coffea.processor as processor
from awkward import JaggedArray
import numpy as np
import glob as glob
import itertools
import json
import uproot_methods
import copy

In [2]:
# Look at ProcessorABC to see the expected methods and what they are supposed to do
class JetMassProcessor(processor.ProcessorABC):
    def __init__(self):
        dataset_axis = hist.Cat("dataset", "Primary dataset")
        self._accumulator = processor.dict_accumulator({
            'cutflow': processor.defaultdict_accumulator(int)
        })
    
    @property
    def accumulator(self):
        return self._accumulator
    
    def process(self, df):
        output = self.accumulator.identity()

        #print("1")
        dataset = df['dataset']
        Jets = JaggedCandidateArray.candidatesfromcounts(
            df['nFatJet'],
            pt=df['FatJet_pt'],
            eta=df['FatJet_eta'],
            phi=df['FatJet_phi'],
            mass=df['FatJet_mass'],
            msoftdrop=df['FatJet_msoftdrop'],
            jetId=df['FatJet_jetId']
            )        
        GenJets = JaggedCandidateArray.candidatesfromcounts(
            df['nGenJetAK8'],
            pt=df['GenJetAK8_pt'],
            eta=df['GenJetAK8_eta'],
            phi=df['GenJetAK8_phi'],
            mass=df['GenJetAK8_mass']
            )
        GenSubJets = JaggedCandidateArray.candidatesfromcounts(
            df['nSubGenJetAK8'],
            pt=df['SubGenJetAK8_pt'],
            eta=df['SubGenJetAK8_eta'],
            phi=df['SubGenJetAK8_phi'],
            mass=df['SubGenJetAK8_mass']
            )
                
        output['cutflow']['all events'] += Jets.size
                        
        pairing1 = Jets.cross(GenJets)
        print("Hey that worked!")
        pairing2 = Jets.cross(GenJets, nested=True)
        print("But this one breaks")
        
        return output


    def postprocess(self, accumulator):
        return accumulator


In [3]:


#infiles = [
#'root://cmsxrootd.fnal.gov//cms/store/group/lpctlbsm/NanoAODJMAR_2019_V1/Production/CRAB/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/DYJetsToLLM-50TuneCUETP8M113TeV-madgraphMLM-pythia8RunIISummer16MiniAODv3-PUMoriond17_ext2-v2/190513_171710/0000/nano102x_on_mini94x_2016_mc_NANO_1.root'
#]
infiles = glob.glob("/mnt/data/cms/store/group/lpctlbsm/NanoAODJMAR_2019_V1/Production/CRAB/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/DYJetsToLLM-50TuneCUETP8M113TeV-madgraphMLM-pythia8RunIISummer16MiniAODv3-PUMoriond17_ext2-v2/190513_171710/0000/nano102x_on_mini94x_2016_mc_NANO_*.root")

fileset = {"DY":infiles[0:3]}

tstart = time.time() 
output = processor.run_uproot_job(fileset,
                                  treename='Events',
                                  processor_instance=JetMassProcessor(),
                                  executor=processor.futures_executor,
                                  executor_args={'workers': 1, 'flatten': True},
                                  chunksize=500000,
                                 )


elapsed = time.time() - tstart
print(output)

Preprocessing: 100%|██████████| 1/1 [00:00<00:00, 20.14it/s]
Processing:   0%|          | 0/3 [00:00<?, ?items/s]

Hey that worked!


Processing:   0%|          | 0/3 [00:01<?, ?items/s]


Hey that worked!
Hey that worked!


ValueError: no column named 'p4'