In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import matplotlib.pyplot as plt
import awkward as ak
import sys
sys.path.append("../../analysisTools/")
from analysisTools import Analyzer
from analysisTools import loadSchema
import analysisTools as tools
import analysisSubroutines as routines
import importlib
import coffea.util as util
import time
import json
import os
import glob

## Some Information

### Input files to be analyzed

**Coffea can be run on either *unskimmed ntuples* or *skimmed ntuples*.**

- ***unskimmed ntuples***: we use AOD samples for the analysis, so we run the [ntuplizer+miniAOD] from [AODSkimmer](https://github.com/kyungminparkdrums/iDMe/tree/main/AODSkimmer) in condor jobs, saving useful branches that will be used in the analysis. 

- ***skimmed ntuples***: from the *unskimmed ntuples* above, apply some basic preselections with `rdataframe` in condor jobs using the scripts in [python_analysis/condor/](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/condor#condor-jobs-for-skimmer-that-applies-preselections-to-the-ntuples). 

Note:
- In the FNAL LPC eos `lpcmetx/iDMe/` area, both unskimmed and skimmed ntuples are available.
- You can also produce your own, following the README in `AODSkimmer/` (for unskimmed ntuples) and `python_analysis/condor/` (for skimmed ntuples). 

### Configs
Coffea analyzer requires config files for cut, histogram, and sample information. Check out the README for each of these, you'll find information on how to add/edit cuts, histograms, etc.
- [cut config](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/configs#cut-configs-cut_configs)
- [histo config](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/configs#histo-configs-histo_configs)
- [sample config](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/configs#sample-configs-sample_configs)

## Run coffea on skimmed ntuples

In [2]:
cuts_config = "./cut_configs/BDTv2_ctau-100.py"
histos_config = "histoConfig.py"

In [3]:
if not os.path.isdir('coffea'):
    os.mkdir('coffea')

In [4]:
outdir = "./coffea/skimmed/"
if not os.path.isdir(outdir):
    os.mkdir(outdir)

### Signal

In [5]:
sample_config = "./samp_configs/skimmed_sig_v2_egamma_hadd_ntuples_2018_aEM_ctau-100.json"

In [6]:
test = Analyzer(sample_config,histos_config,cuts_config, max_samples=-1) # run over all samples in the config

t1 = time.time()
out = test.process(execr='futures')
t2 = time.time()

print("Runtime: {:.2f} minutes".format((t2-t1)/60))
util.save(out,f"{outdir}/signal_SR_BDTv2_ctau-100_5to50_skimmed.coffea")

del out, test

Runtime: 0.56 minutes


### Background

In [7]:
bkg_configs = glob.glob("../../configs/sample_configs/skimmed_bkg*.json")

print(bkg_configs)

['../../configs/sample_configs/skimmed_bkg_2018_DY.json', '../../configs/sample_configs/skimmed_bkg_2018_Diboson.json', '../../configs/sample_configs/skimmed_bkg_2018_QCD_TuneCP5_PSWeights.json', '../../configs/sample_configs/skimmed_bkg_2018_TTJetsDiLept.json', '../../configs/sample_configs/skimmed_bkg_2018_Top.json', '../../configs/sample_configs/skimmed_bkg_2018_Triboson.json', '../../configs/sample_configs/skimmed_bkg_2018_WJets.json', '../../configs/sample_configs/skimmed_bkg_2018_ZJets.json']


In [8]:
for bkg in bkg_configs:
    process = bkg.split('/')[-1].split('.json')[0].split('skimmed_')[-1]
    print(process)

    if 'TTJetsDiLept' in bkg: # currently not available ntuples
        continue
    
    test = Analyzer(bkg,histos_config,cuts_config) 
        
    t1 = time.time()
    out = test.process(execr='futures')
    t2 = time.time()
        
    print("Runtime: {:.2f} minutes".format((t2-t1)/60))
    util.save(out,f"{outdir}/{process}_SR_BDTv2_ctau-100_5to50_skimmed.coffea")
    del out, test

Runtime: 1.72 minutes
