In [None]:
import time

from coffea import hist
from coffea.analysis_objects import JaggedCandidateArray
import coffea.processor as processor
from awkward import JaggedArray
import uproot_methods
from uproot_methods import *
import numpy as np
import glob as glob
import itertools
import json
import uproot_methods
import copy

In [None]:
# Look at ProcessorABC to see the expected methods and what they are supposed to do
class JetMassProcessor(processor.ProcessorABC):
    def __init__(self):
        dataset_axis = hist.Cat("dataset", "Primary dataset")

        pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 
                           np.array([200,240,300,360,400,450,500]))
        m_axis = hist.Bin("m", r"$p_{T}$ [GeV]", np.array([0,5,10,20,30,40,50,60,80,100,500]))
        r_axis = hist.Bin("r", "RECO / GEN response", 100, 0, 2)
        dr_axis = hist.Bin("dr", r"$\Delta r$", 80, 0, 0.8)
        
        self._accumulator = processor.dict_accumulator({
            'pt':hist.Hist("Counts", dataset_axis, pt_axis),
            'm':hist.Hist("Counts", dataset_axis, m_axis),
            'msd':hist.Hist("Counts", dataset_axis, m_axis),
            'pt_v_m':hist.Hist("Counts", dataset_axis, pt_axis, m_axis ),
            'pt_v_msd':hist.Hist("Counts", dataset_axis, pt_axis, m_axis ),
            'dr':hist.Hist("Counts", dataset_axis, dr_axis),
            'r_pt_ptvm':hist.Hist("Counts", dataset_axis, pt_axis, m_axis, r_axis),
            'r_m_ptvm':hist.Hist("Counts", dataset_axis, pt_axis, m_axis, r_axis),
            'r_msd_ptvmsd':hist.Hist("Counts", dataset_axis, pt_axis, m_axis, r_axis),
            'cutflow': processor.defaultdict_accumulator(int)
        })
    
    @property
    def accumulator(self):
        return self._accumulator
    
    def process(self, df):
        output = self.accumulator.identity()

        #print("1")
        dataset = df['dataset']
        Jets = JaggedCandidateArray.candidatesfromcounts(
            df['nFatJet'],
            pt=df['FatJet_pt'],
            eta=df['FatJet_eta'],
            phi=df['FatJet_phi'],
            mass=df['FatJet_mass'],
            msoftdrop=np.where( df['FatJet_msoftdrop'] >= 0,df['FatJet_msoftdrop'],0),
            jetId=df['FatJet_jetId']
            )        
        GenJets = JaggedCandidateArray.candidatesfromcounts(
            df['nGenJetAK8'],
            pt=df['GenJetAK8_pt'],
            eta=df['GenJetAK8_eta'],
            phi=df['GenJetAK8_phi'],
            mass=df['GenJetAK8_mass']
            )
        GenSubJets = JaggedCandidateArray.candidatesfromcounts(
            df['nSubGenJetAK8'],
            pt=df['SubGenJetAK8_pt'],
            eta=df['SubGenJetAK8_eta'],
            phi=df['SubGenJetAK8_phi'],
            mass=df['SubGenJetAK8_mass']
            )
        # Match gen <---> gen subjets
        gensubpairs = GenJets.cross( GenSubJets, nested=True )
        gensubjetmetric = gensubpairs.i0.p4.delta_r(gensubpairs.i1.p4)
        dr_cut = (gensubjetmetric < 0.8)
        gensubpairs = gensubpairs[dr_cut]
        gensubjets = gensubpairs.i1        
        gengroomed = gensubjets.p4.sum()
        # Add the groomed p4 and mass to the GenJet table
        GenJets.add_attributes( sdp4=gengroomed )
        GenJets.add_attributes( msoftdrop=gengroomed.mass )
        

        # Require at least one reco jet that passes jet id
        output['cutflow']['all events'] += Jets.size
        jetId_cut = (Jets.jetId > 0)
        Jets = Jets[jetId_cut]
        output['cutflow']['>=1 with loose id'] += jetId_cut.any().sum()        
        oneJet = (Jets.counts >=1)
        Jets = Jets[oneJet]
        GenJets = GenJets[oneJet]
        GenSubJets = GenSubJets[oneJet]
                
        # Match reco <---> gen
        recogenpairs = Jets.cross(GenJets, nested=True)
        metric = recogenpairs.i0.p4.delta_r( recogenpairs.i1.p4 )
        index_of_minimized = metric.argmin()
        dr_cut2 = (metric[index_of_minimized] < 0.8)
        recogenpairs = recogenpairs[index_of_minimized][dr_cut2]
                
        # Make some plots)
        ptreco = recogenpairs.i0.pt.flatten().flatten()
        ptgen = recogenpairs.i1.pt.flatten().flatten()               
        mreco = recogenpairs.i0.mass.flatten().flatten()
        msdreco = recogenpairs.i0.msoftdrop.flatten().flatten()
        mgen = recogenpairs.i1.mass.flatten().flatten()
        msdgen = recogenpairs.i1.msoftdrop.flatten().flatten()        
        ptreco_over_ptgen = np.where( ptgen > 0, ptreco/ptgen, 0)
        mreco_over_mgen = np.where( mgen > 0, mreco/mgen, 0)
        msdreco_over_msdgen = np.where( msdgen > 0, msdreco/msdgen, 0)
        
        output['pt'].fill(dataset=dataset,pt=ptreco)
        output['m'].fill(dataset=dataset,m=mreco)
        output['msd'].fill(dataset=dataset,m=msdreco)
        output['pt_v_m'].fill(dataset=dataset,pt=ptreco,m=mreco)
        output['pt_v_msd'].fill(dataset=dataset,pt=ptreco,m=msdreco)
        output['r_pt_ptvm'].fill(dataset=dataset,pt=ptgen,m=mgen,r=ptreco_over_ptgen)
        output['r_m_ptvm'].fill(dataset=dataset,pt=ptgen,m=mgen,r=mreco_over_mgen)
        output['r_msd_ptvmsd'].fill(dataset=dataset,pt=ptgen,m=msdgen,r=msdreco_over_msdgen)

        return output


    def postprocess(self, accumulator):
        return accumulator


In [None]:
# Can grab a file on cmslpc from 
# /store/group/lpctlbsm/NanoAODJMAR_2019_V1/Production/CRAB/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/DYJetsToLLM-50TuneCUETP8M113TeV-madgraphMLM-pythia8RunIISummer16MiniAODv3-PUMoriond17_ext2-v2/190513_171710/0000/*.root

#infiles = glob.glob('/mnt/data/cms/store/group/lpctlbsm/NanoAODJMAR_2019_V1/Production/CRAB/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/DYJetsToLLM-50TuneCUETP8M113TeV-madgraphMLM-pythia8RunIISummer16MiniAODv3-PUMoriond17_ext2-v2/190513_171710/0000/*.root')
infiles = glob.glob('/mnt/data/cms/store/group/lpctlbsm/NanoAODJMAR_2019_V1/Production/CRAB/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/DYJetsToLLM-50TuneCUETP8M113TeV-madgraphMLM-pythia8RunIISummer16MiniAODv3-PUMoriond17_ext2-v2/190513_171710/*.root')

fileset = {"DY":infiles}

tstart = time.time() 
output = processor.run_uproot_job(fileset,
                                  treename='Events',
                                  processor_instance=JetMassProcessor(),
                                  executor=processor.futures_executor,
                                  executor_args={'workers':4, 'flatten': True},
                                  chunksize=500000,
                                 )


elapsed = time.time() - tstart
print(output)

HBox(children=(IntProgress(value=0, description='Preprocessing', max=1, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='Processing', max=194, style=ProgressStyle(description_width='…

  return self._trymemo("mass", lambda self: self.awkward.numpy.sqrt(self.mag2))
  return self._trymemo("mass", lambda self: self.awkward.numpy.sqrt(self.mag2))
  return self._trymemo("mass", lambda self: self.awkward.numpy.sqrt(self.mag2))
  return self._trymemo("mass", lambda self: self.awkward.numpy.sqrt(self.mag2))


In [None]:
import matplotlib.pyplot as plt

In [None]:
# Set fonts (from https://stackoverflow.com/questions/3899980/how-to-change-the-font-size-on-a-matplotlib-plot)
SMALL_SIZE = 14
MEDIUM_SIZE = 18
BIGGER_SIZE = 24

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title


In [None]:
#H2 = H.reshape( (len(ptbins)-1)*(len(mbins)-1), (len(ptbins)-1)*(len(mbins)-1) )

In [None]:
#plt.imshow(H2)

In [None]:
fig, ax = hist.plotgrid(output['pt'], overlay="dataset", stack=False
                                 )
plt.yscale("log")
for iax in ax.flatten():
    iax.autoscale(axis='y')
fig.show()

In [None]:
fig, ax = hist.plotgrid(output['m'], overlay="dataset", stack=False
                                 )
plt.yscale("log")
for iax in ax.flatten():
    iax.autoscale(axis='y')
fig.show()

In [None]:
for i in output['r_pt_ptvm'].axis('pt'):
    for j in output['r_pt_ptvm'].axis('m'):
        fig, ax = hist.plotgrid( output['r_pt_ptvm'].project('dataset').project('pt', the_slice=i).project('m', the_slice=j), stack=False)
        plt.title("pt = %5.0f  m = %5.0f" % (i.hi, j.hi) )

