In [1]:
%load_ext autoreload
%autoreload 2
import uproot
import numpy as np
import matplotlib.pyplot as plt
import awkward as ak
import importlib
import coffea.util as util
import time
import json
import os

import sys
sys.path.append("/uscms/home/kyungmip/nobackup/CMSSW_10_6_26/src/iDMe/python_analysis/analysisTools/")
from analysisTools import Analyzer
from analysisTools import loadSchema
import analysisTools as tools
import analysisSubroutines as routines

### Input files to be analyzed
Coffea analyzer will analyze the ntuples that skimmed AOD. The skimmed ntuples information (file location, xsec, lumi, weight, etc) is available as sample config json files and the json files are given as input to the analyzer. 

The workflow is as follows:
1) `EDAnalyzer` format ntuplizer is run on AOD, saving useful branches: `/AODSkimmer/`
2) `rdataframe` format skimmer is run on the ntuples, applying basic cuts (MET filter, trigger, MET cut etc): `/python_analysis/condor/condor_skim_rdf.py`
3) `coffea` format analyzer is run on the skimmed files, applying the rest of the cuts. This step is done in this notebook. 

In [15]:
# update jsons from original ones in case cross section, number of files, etc has updated
sample_json_dir = './'

src = "/uscms/home/kyungmip/nobackup/CMSSW_10_6_26/src/iDMe/python_analysis/analysisTools/configs/sample_configs/"
jsons = [f for f in os.listdir(sample_json_dir) if ".json" in f]
for jf in jsons:
    if not os.path.exists(src+jf):
        print(f"Skipping {jf}, can't find original file")
        continue
    with open(src+jf,"r") as sf:
        source = json.load(sf)
    with open(jf,"r") as tf:
        targ = json.load(tf)
    for i,entry in enumerate(targ):
        src_entry = [k for k in source if k['name']==entry['name']][0]
        entry['xsec'] = src_entry['xsec']
        entry['sum_wgt'] = src_entry['sum_wgt']
        entry['nFiles'] = src_entry['nFiles']
        if 'num_events' in src_entry.keys():
            entry['num_events'] = src_entry['num_events']
    with open(jf,"w") as of:
        json.dump(targ,of,indent=4)

Skipping bkg_2018_Diboson.json, can't find original file
Skipping bkg_2018_DY.json, can't find original file
Skipping bkg_2018_QCD_TuneCP5_PSWeights.json, can't find original file
Skipping bkg_2018_Top.json, can't find original file
Skipping bkg_2018_Triboson.json, can't find original file
Skipping bkg_2018_TTJetsDiLept.json, can't find original file
Skipping bkg_2018_WJets.json, can't find original file
Skipping bkg_2018_ZJets.json, can't find original file
Skipping signal_v2_2018_aEM.json, can't find original file


In [16]:
cut_file = "./configs/cut_configs/SR_fromSkimmed_v1.py"
hist_file = "./configs/histo_configs/SR_studies.py"

In [17]:
outdir = "./coffea/"
os.system(f"mkdir -p {outdir}")

0

In [18]:
# Coffea output file name suffix

#suffix = "_ejdR_rejection_IDcut"
suffix = "_hist_cut8to11"

### Signal

In [19]:
signal_list = [ samp.split('.')[0] for samp in jsons if "signal" in samp ]
print(signal_list)

['signal_v2_2018_aEM']


In [20]:
for samp in signal_list:
    print(f'Start analyzing: {samp}')
    
    test = Analyzer(f"{samp}.json",hist_file,cut_file)
    
    t1 = time.time()
    out = test.process(execr='futures',lite=True)
    t2 = time.time()
    
    util.save(out, f"{outdir}/{samp}{suffix}.coffea")
    
    print('Completed in {:.2f} min\n'.format((t2-t1)/60))
    del out, test

Start analyzing: signal_v2_2018_aEM


Output()

Completed in 4.77 min



### Backgrounds

In [21]:
bkg_list = [ samp.split('.')[0] for samp in jsons if "bkg" in samp ]
print(bkg_list)

['bkg_2018_Diboson', 'bkg_2018_DY', 'bkg_2018_QCD_TuneCP5_PSWeights', 'bkg_2018_Top', 'bkg_2018_Triboson', 'bkg_2018_TTJetsDiLept', 'bkg_2018_WJets', 'bkg_2018_ZJets']


In [22]:
for samp in bkg_list:
    print(f'Start analyzing: {samp}')
    
    test = Analyzer(f"{samp}.json",hist_file,cut_file)
    
    t1 = time.time()
    out = test.process(execr='futures',lite=True)
    t2 = time.time()
    
    util.save(out, f"{outdir}/{samp}{suffix}.coffea")
    
    print('Completed in {:.2f} min\n'.format((t2-t1)/60))
    del out, test

Start analyzing: bkg_2018_Diboson


Output()

Completed in 0.59 min

Start analyzing: bkg_2018_DY


Output()

Completed in 0.53 min

Start analyzing: bkg_2018_QCD_TuneCP5_PSWeights


Output()

Completed in 1.23 min

Start analyzing: bkg_2018_Top


Output()

Completed in 3.63 min

Start analyzing: bkg_2018_Triboson


Output()

Completed in 0.23 min

Start analyzing: bkg_2018_TTJetsDiLept


Output()

Completed in 0.47 min

Start analyzing: bkg_2018_WJets


Output()

Completed in 3.11 min

Start analyzing: bkg_2018_ZJets


Output()

Completed in 3.21 min

