In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import matplotlib.pyplot as plt
import awkward as ak

import sys
sys.path.append("../../analysisTools/")
from analysisTools import Analyzer
from analysisTools import loadSchema
import analysisTools as tools
import analysisSubroutines as routines
import importlib
import coffea.util as util

import time
import json
import os
import glob

## Some Information

### Input files to be analyzed

**Coffea can be run on either *unskimmed ntuples* or *skimmed ntuples*.**

- ***unskimmed ntuples***: we use AOD samples for the analysis, so we run the [ntuplizer+miniAOD] from [AODSkimmer](https://github.com/kyungminparkdrums/iDMe/tree/main/AODSkimmer) in condor jobs, saving useful branches that will be used in the analysis. 

- ***skimmed ntuples***: from the *unskimmed ntuples* above, apply some basic preselections with `rdataframe` in condor jobs using the scripts in [python_analysis/condor/](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/condor#condor-jobs-for-skimmer-that-applies-preselections-to-the-ntuples). 

Note:
- In the FNAL LPC eos `lpcmetx/iDMe/` area, both unskimmed and skimmed ntuples are available.
- You can also produce your own, following the README in `AODSkimmer/` (for unskimmed ntuples) and `python_analysis/condor/` (for skimmed ntuples). 

### Configs
Coffea analyzer requires config files for cut, histogram, and sample information. Check out the README for each of these, you'll find information on how to add/edit cuts, histograms, etc.
- [cut config](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/configs#cut-configs-cut_configs)
- [histo config](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/configs#histo-configs-histo_configs)
- [sample config](https://github.com/kyungminparkdrums/iDMe/tree/main/python_analysis/configs#sample-configs-sample_configs)

## Run coffea on skimmed ntuples

In [2]:
cuts_config = "./configs/cut_configs/SR_BDT_OneBin.py"
histos_config = "./configs/histo_configs/histos_SR.py"

In [3]:
# BDT 
model_config = "./configs/models/BDT_inclusive_10Vars.json"

In [4]:
outdir = "./coffea/skimmed/"
os.makedirs(outdir, exist_ok=True)

### Signal

In [5]:
sample_config = "./configs/sample_configs/skimmed_signal_v9_miniAOD_2018_aEM_noNJetCut.json"

In [None]:
analyzer = Analyzer(sample_config, histos_config, cuts_config, model_config) # If using BDT in cuts
#analyzer = Analyzer(sample_config, histos_config, cuts_config) # If not using BDT in cuts

t1 = time.time()
out = analyzer.process(execr='futures')
t2 = time.time()

print("Runtime: {:.2f} minutes".format((t2-t1)/60))
util.save(out,f"{outdir}/example_signal_skimmed_SR.coffea")

del out, analyzer

Output()

Output()

### Background

In [7]:
#bkg_configs = glob.glob("./configs/sample_configs/skimmed_bkg*.json")
#print(bkg_configs)

In [8]:
sample_config = "./configs/sample_configs/skimmed_bkg_2018_v7_miniAOD_noNJetCut.json"

In [9]:
analyzer = Analyzer(sample_config, histos_config, cuts_config, model_config) # If using BDT in cuts

t1 = time.time()
out = analyzer.process(execr='futures')
t2 = time.time()

print("Runtime: {:.2f} minutes".format((t2-t1)/60))
util.save(out,f"{outdir}/example_bkg_skimmed_SR.coffea")

del out, analyzer

Output()

Output()

Runtime: 4.72 minutes
