In [1]:
%load_ext autoreload
%autoreload 2
import uproot
import numpy as np
import matplotlib.pyplot as plt
import awkward as ak
import importlib
import coffea.util as util
import time
import json
import os

import sys
sys.path.append("/uscms/home/kyungmip/nobackup/CMSSW_10_6_26/src/iDMe/python_analysis/analysisTools/")
from analysisTools import Analyzer
from analysisTools import loadSchema
import analysisTools as tools
import analysisSubroutines as routines

### Input files to be analyzed
Coffea analyzer will analyze the ntuples that skimmed AOD. The skimmed ntuples information (file location, xsec, lumi, weight, etc) is available as sample config json files and the json files are given as input to the analyzer. 

The workflow is as follows:
1) `EDAnalyzer` format ntuplizer is run on AOD, saving useful branches: `/AODSkimmer/`
2) `rdataframe` format skimmer is run on the ntuples, applying basic cuts (MET filter, trigger, MET cut etc): `/python_analysis/condor/condor_skim_rdf.py`
3) `coffea` format analyzer is run on the skimmed files, applying the rest of the cuts. This step is done in this notebook. 

In [2]:
# update jsons from original ones in case cross section, number of files, etc has updated
sample_json_dir = './'

src = "/uscms/home/kyungmip/nobackup/CMSSW_10_6_26/src/iDMe/python_analysis/analysisTools/configs/sample_configs/"
jsons = [f for f in os.listdir(sample_json_dir) if ".json" in f]
for jf in jsons:
    if not os.path.exists(src+jf):
        print(f"Skipping {jf}, can't find original file")
        continue
    with open(src+jf,"r") as sf:
        source = json.load(sf)
    with open(jf,"r") as tf:
        targ = json.load(tf)
    for i,entry in enumerate(targ):
        src_entry = [k for k in source if k['name']==entry['name']][0]
        entry['xsec'] = src_entry['xsec']
        entry['sum_wgt'] = src_entry['sum_wgt']
        entry['nFiles'] = src_entry['nFiles']
        if 'num_events' in src_entry.keys():
            entry['num_events'] = src_entry['num_events']
    with open(jf,"w") as of:
        json.dump(targ,of,indent=4)

Skipping bkg_2018_Diboson.json, can't find original file
Skipping bkg_2018_DY.json, can't find original file
Skipping bkg_2018_QCD_TuneCP5_PSWeights.json, can't find original file
Skipping bkg_2018_Top.json, can't find original file
Skipping bkg_2018_Triboson.json, can't find original file
Skipping bkg_2018_TTJetsDiLept.json, can't find original file
Skipping bkg_2018_WJets.json, can't find original file
Skipping bkg_2018_ZJets.json, can't find original file
Skipping signal_v2_2018_aEM.json, can't find original file
Skipping signal_2018_aEM_5p25.json, can't find original file
Skipping signal_2018_aEM_5p25_ct_1.json, can't find original file


In [3]:
cut_file = "./configs/cut_configs/SR_fromSkimmed_v1.py"
hist_file = "./configs/histo_configs/SR_studies.py"

In [4]:
outdir = "./coffea/"
os.system(f"mkdir -p {outdir}")

0

In [5]:
# Coffea output file name suffix

#suffix = "_ejdR_rejection_IDcut"
suffix = "_deltaR_nGoodVtx_finer_dR"

### Signal

In [6]:
signal_list = [ samp.split('.')[0] for samp in jsons if "signal_2018_aEM_5p25_ct_1" in samp ]
print(signal_list)

['signal_2018_aEM_5p25_ct_1']


In [7]:
for samp in signal_list:
    print(f'Start analyzing: {samp}')
    
    test = Analyzer(f"{samp}.json",hist_file,cut_file)
    
    t1 = time.time()
    out = test.process(execr='futures',lite=True)
    t2 = time.time()
    
    util.save(out, f"{outdir}/{samp}{suffix}.coffea")
    
    print('Completed in {:.2f} min\n'.format((t2-t1)/60))
    del out, test

Start analyzing: signal_2018_aEM_5p25_ct_1


Output()

Output()

concurrent.futures.process._RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/uscms/home/kyungmip/nobackup/miniconda3/envs/coffea/lib/python3.8/site-packages/coffea/processor/executor.py", line 1654, in _work_function
    out = processor_instance.process(events)
  File "/uscms/home/kyungmip/nobackup/CMSSW_10_6_26/src/iDMe/python_analysis/analysisTools/analysisTools.py", line 240, in process
    routines.selectExistingGoodVtx(events)
  File "/uscms/home/kyungmip/nobackup/CMSSW_10_6_26/src/iDMe/python_analysis/analysisTools/analysisSubroutines.py", line 80, in selectExistingGoodVtx
    events["LptElectron","dPhij1"] = ak.where(j1_phi != -999,np.abs(deltaPhi(events.LptElectron.phi,j1_phi)),999)
  File "/uscms/home/kyungmip/nobackup/miniconda3/envs/coffea/lib/python3.8/site-packages/awkward/highlevel.py", line 1062, in __setitem__
    array = ak.operations.structure.with_field(self.layout, what, where)
  File "/uscms/home/kyungmip/nobackup/miniconda3/envs/coffea/lib/python3

Exception: Failed processing file: WorkItem(dataset='sig_2018_Mchi-5p25_dMchi-0p5_ctau-1', filename='root://cmsxrootd.fnal.gov//store/group/lpcmetx/iDMe/skimmed_ntuples/signal/signal_v2_2018_5p25_aEM_rdfSkim/output_sig_Mchi-5p25_dMchi-0p5_ct-1//ntuples_sig_Mchi-5.25_dMchi-0.5_ct-1_3.root', treename='ntuples/outT', entrystart=0, entrystop=709, fileuuid=b'\xfe\xaa\xa1\x8c_\xc9\x11\xee\xadu\x9d\xbc\xe1\x83\xbe\xef', usermeta={})

### Backgrounds

In [None]:
bkg_list = [ samp.split('.')[0] for samp in jsons if "bkg" in samp ]
print(bkg_list)

In [None]:
for samp in bkg_list:
    print(f'Start analyzing: {samp}')
    
    test = Analyzer(f"{samp}.json",hist_file,cut_file)
    
    t1 = time.time()
    out = test.process(execr='futures',lite=True)
    t2 = time.time()
    
    util.save(out, f"{outdir}/{samp}{suffix}.coffea")
    
    print('Completed in {:.2f} min\n'.format((t2-t1)/60))
    del out, test