In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import matplotlib.pyplot as plt
import awkward as ak
import sys
sys.path.append("../../analysisTools/")
from analysisTools import Analyzer
from analysisTools import loadSchema
import analysisTools as tools
import analysisSubroutines as routines
import importlib
import coffea.util as util
import time
import json
import os
import glob

### Input files to be analyzed

**Coffea can be run on either *unskimmed ntuples* or *skimmed ntuples*.**

- ***unskimmed ntuples***: we use AOD samples for the analysis, so we run the [ntuplizer+miniAOD] from [AODSkimmer](https://github.com/kyungminparkdrums/iDMe/tree/main/AODSkimmer) in condor jobs, saving useful branches that will be used in the analysis. 

- ***skimmed ntuples***: from the *unskimmed ntuples* above, apply some basic preselections with `rdataframe` in condor jobs using the scripts in [python_analysis/condor/](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/condor#condor-jobs-for-skimmer-that-applies-preselections-to-the-ntuples). 

Note:
- In the FNAL LPC eos `lpcmetx/iDMe/` area, both unskimmed and skimmed ntuples are available.
- You can also produce your own, following the README in `AODSkimmer/` (for unskimmed ntuples) and `python_analysis/condor/` (for skimmed ntuples). 

### Configs
Coffea analyzer requires config files for cut, histogram, and sample information. Check out the README for each of these, you'll find information on how to add/edit cuts, histograms, etc.
- [cut config](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/configs#cut-configs-cut_configs)
- [histo config](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/configs#histo-configs-histo_configs)
- [sample config](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/configs#sample-configs-sample_configs)

In [2]:
cuts_config = "../../configs/cut_configs/SR_v2.py"
histos_config = "../../configs/histo_configs/SR_studies.py"
sample_config = "../../configs/sample_configs/sig_v2_egamma_hadd_2018_aEM.json"

In [3]:
outdir = "./coffea/unskimmed/"
if not os.path.isdir(outdir):
    os.mkdir(outdir)

In [None]:
test = Analyzer(sample_config,histos_config,cuts_config, max_samples=2) # only run over the first 2 signal samples in the config file

t1 = time.time()
out = test.process(execr='futures')
t2 = time.time()

print("Runtime: {:.2f} minutes".format((t2-t1)/60))
util.save(out,f"{outdir}/example_signal_v2_unskimmed_vtx_match_cutflow.coffea")

del out, test

Output()

## Run coffea on skimmed ntuples

In [2]:
cuts_config = "./SR_v3_skimmed.py"
histos_config = "../../configs/histo_configs/SR_studies.py"

In [3]:
outdir = "./coffea/skimmed/"
if not os.path.isdir(outdir):
    os.mkdir(outdir)

### Signal

In [4]:
sample_config = "./skimmed_sig_v2_egamma_hadd_ntuples_2018_aEM.json"

In [5]:
test = Analyzer(sample_config,histos_config,cuts_config, max_samples=-1) # run over all samples in the config

t1 = time.time()
out = test.process(execr='futures')
t2 = time.time()

print("Runtime: {:.2f} minutes".format((t2-t1)/60))
util.save(out,f"{outdir}/signal_SR_v3_skimmed.coffea")

del out, test

Output()

Output()

Runtime: 2.90 minutes


### Background

In [6]:
bkg_configs = glob.glob("../../configs/sample_configs/skimmed_bkg*.json")

print(bkg_configs)

['../../configs/sample_configs/skimmed_bkg_2018_DY.json', '../../configs/sample_configs/skimmed_bkg_2018_Diboson.json', '../../configs/sample_configs/skimmed_bkg_2018_QCD_TuneCP5_PSWeights.json', '../../configs/sample_configs/skimmed_bkg_2018_TTJetsDiLept.json', '../../configs/sample_configs/skimmed_bkg_2018_Top.json', '../../configs/sample_configs/skimmed_bkg_2018_Triboson.json', '../../configs/sample_configs/skimmed_bkg_2018_WJets.json', '../../configs/sample_configs/skimmed_bkg_2018_ZJets.json']


In [7]:
for bkg in bkg_configs:
    process = bkg.split('/')[-1].split('.json')[0].split('skimmed_')[-1]
    print(process)

    if 'TTJetsDiLept' in bkg: # currently not available ntuples
        continue
    
    test = Analyzer(bkg,histos_config,cuts_config) 
        
    t1 = time.time()
    out = test.process(execr='futures')
    t2 = time.time()
        
    print("Runtime: {:.2f} minutes".format((t2-t1)/60))
    util.save(out,f"{outdir}/{process}_SR_v3_skimmed.coffea")
    del out, test

bkg_2018_DY


Output()

Output()

Runtime: 0.87 minutes
bkg_2018_Diboson


Output()

Output()

Runtime: 0.53 minutes
bkg_2018_QCD_TuneCP5_PSWeights


Output()

Output()

Runtime: 2.41 minutes
bkg_2018_TTJetsDiLept
bkg_2018_Top


Output()

Output()

Runtime: 3.87 minutes
bkg_2018_Triboson


Output()

Output()

Runtime: 0.27 minutes
bkg_2018_WJets


Output()

Output()

Runtime: 3.08 minutes
bkg_2018_ZJets


Output()

Output()

Runtime: 3.02 minutes
