In [1]:
import json
import uproot
import uproot_methods
import awkward
import numpy as np
%matplotlib nbagg

import fnal_column_analysis_tools.processor as processor
import fnal_column_analysis_tools.hist as hist
from fnal_column_analysis_tools.hist import plot
import matplotlib.pyplot as plt

In [2]:
with open('files.json') as fin:
    files = json.load(fin)

print("\n".join(files.keys()))
redir = 'root://cmsxrootd.fnal.gov/'
samples = {k: [redir+f for f in v[:1]] for k,v in files.items()}

GluGluHToBB_M125_13TeV_powheg_pythia8
GluGluHToBB_M125_13TeV_amcatnloFXFX_pythia8
GluGluHToBB_M125_LHEHpT_250-Inf_13TeV_amcatnloFXFX_pythia8
GluGluHToBB_M-125_13TeV_powheg_MINLO_NNLOPS_pythia8
VBFHToBB_M-125_13TeV_powheg_pythia8_weightfix


In [3]:
class HiggsGenComparison(processor.ProcessorABC):
    def __init__(self):
        dataset_axis = hist.Cat("dataset", "Primary dataset")
        pt_axis = hist.Bin("pt", r"$p_{T,h}$ [GeV]", 120, 0., 1200.)
        
        self._accumulator = processor.dict_accumulator({
            'hpt': hist.Hist("Counts", dataset_axis, pt_axis),
        })
    
    @property
    def accumulator(self):
        return self._accumulator
    
    def nanoObject(self, df, prefix):
        branches = set(k.decode('ascii') for k in df.available if k.decode('ascii').startswith(prefix))
        p4branches = [prefix + k for k in ['pt', 'eta', 'phi', 'mass']]
        branches -= set(p4branches)
        objp4 = uproot_methods.TLorentzVectorArray.from_ptetaphim(*[df[b] for b in p4branches])
        branches = {k[len(prefix):]: df[k] for k in branches}
        obj = awkward.JaggedArray.zip(p4=objp4, **branches)
        return obj

    def process(self, df):
        output = self.accumulator.identity()
        
        dataset = df['dataset']
        genp = self.nanoObject(df, 'GenPart_')
        hidx = (genp['pdgId']==25) & (genp['statusFlags']&(1<<7)).astype(bool)
        higgs = genp[hidx]
        
        output['hpt'].fill(dataset=dataset, pt=higgs['p4'].pt.flatten())
        
        return output

    def postprocess(self, accumulator):
        return accumulator

In [None]:
output = processor.run_uproot_job(samples,
                                  treename='Events',
                                  processor_instance=HiggsGenComparison(),
                                  executor=processor.futures_executor,
                                  executor_args={'workers': 8},
                                  chunksize=500000,
                                 )


Building chunk lists: 100%|██████████| 5/5 [00:04<00:00,  1.20it/s]
  0%|          | 0/11 [00:00<?, ?items/s]

working
working
working
working
working
working
working
working


In [None]:
plot.plot1d(output['hpt'], overlay='dataset')

In [None]:
output['_bytesread'].value/1e9