# Compute the LateX cutflow tables
- Will load the pkl files that contain the cutflows and the sumgenweight
- Will scale the events by the cross section
- Will save the yields in a dictionnary called ```cutflows -> Dict()```
- Will make the LateX table using the function ```make_composition_table()```

In [1]:
import glob
import json
import os
import pickle as pkl
import sys

import hist as hist2
import matplotlib.pyplot as plt
import mplhep as hep
import numpy as np
import pandas as pd
import pyarrow
import pyarrow.parquet as pq
import yaml
from scipy.special import softmax
from sklearn.metrics import auc, roc_curve

sys.path
sys.path.append("../python/")

import utils

plt.style.use(hep.style.CMS)
plt.rcParams.update({"font.size": 20})

pd.options.mode.chained_assignment = None

import glob
import os
import json
import pickle
import yaml
import math

import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None
import pyarrow.parquet as pq
from sklearn.metrics import auc, roc_curve
from scipy.special import softmax

import hist as hist2
import matplotlib.pyplot as plt
import mplhep as hep

plt.style.use(hep.style.CMS)

import sys
sys.path
sys.path.append("../python/")

import utils

plt.rcParams.update({"font.size": 20})

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# get lumi
with open("../fileset/luminosity.json") as f:
    luminosity = json.load(f)
    
luminosity

{'ele': {'Run2': 137640.0,
  '2016APV': 19492.72,
  '2016': 16809.96,
  '2017': 41476.02,
  '2018': 59816.23},
 'mu': {'Run2': 137640.0,
  '2016APV': 19436.16,
  '2016': 16810.81,
  '2017': 41475.26,
  '2018': 59781.96},
 'lep': {'Run2': 137640.0,
  '2016APV': 19436.16,
  '2016': 16810.81,
  '2017': 41475.26,
  '2018': 59781.96},
 'had': {'Run2': 137640.0,
  '2016APV': 19436.16,
  '2016': 16810.81,
  '2017': 41475.26,
  '2018': 59781.96}}

In [4]:
def get_lumi(years, channels):
    lum_ = 0
    for year in years:
        lum = 0
        for ch in channels:
            lum += luminosity[ch][year] / 1000.0

        lum_ += lum / len(channels)    
    return lum_

# Read cutflows from pkl

In [5]:
def get_sum_sumgenweight(pkl_files, year, sample):
    """Load and sum the sumgenweight of each pkl file."""
    
    sum_sumgenweight = 0
    for ifile in pkl_files:
        with open(ifile, "rb") as f:
            metadata = pkl.load(f)            
        sum_sumgenweight = sum_sumgenweight + metadata[sample][year]["sumgenweight"]

    return sum_sumgenweight


def get_xsecweight(pkl_files, year, ch, sample, is_data):
    
    if not is_data:
        # find xsection
        f = open("../fileset/xsec_pfnano.json")
        xsec = json.load(f)
        f.close()
        try:
            xsec = eval(str((xsec[sample])))
        except ValueError:
            print(f"sample {sample} doesn't have xsecs defined in xsec_pfnano.json so will skip it")
            return None

        # get overall weighting of events.. each event has a genweight...
        # sumgenweight sums over events in a chunk... sum_sumgenweight sums over chunks
        xsec_weight = (xsec * luminosity[ch][year]) / get_sum_sumgenweight(pkl_files, year, sample)
    else:
        xsec_weight = 1
    return xsec_weight

def get_cutflow(pkl_files, year, ch, sample, is_data):
    """
    Get cutflow from metadata but multiply by xsec-weight
    """
    xsec_weight = get_xsecweight(pkl_files, year, ch, sample, is_data)
        
    cuts = [
        "sumgenweight",
        "Trigger",
        "METFilters",
        "OneLep",
        "NoTaus",
        "AtLeastOneFatJet",
        "CandidateJetpT",
        "LepInJet",
        "JetLepOverlap",
        "dPhiJetMET",
        "MET",
    ]
        
    if year == "2018":
        cuts += ["HEMCleaning"]
        
    evyield = dict.fromkeys(cuts, 0)
    for ik, pkl_file in enumerate(pkl_files):
        with open(pkl_file, "rb") as f:
            metadata = pkl.load(f)
            
        cutflows = metadata[sample][year]["cutflows"][ch]

        for key in evyield.keys():

            if key == "sumgenweight":
                evyield[key] += metadata[sample][year][key] * xsec_weight
            else:
                evyield[key] += cutflows[key] * xsec_weight        
    return evyield

In [6]:
! ls ../eos/July18_hww_2016APV/WJetsToLNu_HT-100To200/outfiles/0-10.pkl

../eos/July18_hww_2016APV/WJetsToLNu_HT-100To200/outfiles/0-10.pkl


In [7]:
with open("../eos/July18_hww_2016APV/WJetsToLNu_HT-100To200/outfiles/0-10.pkl", "rb") as f:
    metadata = pkl.load(f)
metadata

{'WJetsToLNu_HT-100To200': {'mc': 149,
  '2016APV': {'sumgenweight': 1492951.0,
   'sumlheweight': {0: 1551137.2,
    1: 1551799.4,
    2: 1541534.8,
    3: 1491997.6,
    4: 1492951.0,
    5: 1483308.9,
    6: 1443469.5,
    7: 1444666.2,
    8: 1435536.2},
   'sumpdfweight': {0: 1492951.0,
    1: 1494297.1,
    2: 1492237.6,
    3: 1494463.2,
    4: 1493804.0,
    5: 1494341.9,
    6: 1493538.8,
    7: 1494321.6,
    8: 1493813.9,
    9: 1493034.2,
    10: 1492469.5,
    11: 1491638.0,
    12: 1493345.8,
    13: 1493301.8,
    14: 1492837.2,
    15: 1493383.9,
    16: 1492520.0,
    17: 1492682.9,
    18: 1492891.5,
    19: 1492741.8,
    20: 1493447.0,
    21: 1493083.9,
    22: 1493360.1,
    23: 1492864.5,
    24: 1493688.5,
    25: 1493356.0,
    26: 1493390.6,
    27: 1493141.5,
    28: 1493313.1,
    29: 1494101.6,
    30: 1493643.0,
    31: 1494384.6,
    32: 1493172.6,
    33: 1494015.9,
    34: 1493571.5,
    35: 1492720.9,
    36: 1492159.4,
    37: 1493126.9,
    38: 14935

# Adding a cut from the parquets

In [9]:
### This is your configuration. specefy which channels, years, samples, and directory of pkl files to use.
channels = [
    "ele", 
    "mu",
]
years = [
#     "2018", 
#     "2017",
    "2016", 
    "2016APV",
]

samples = [
    "ggF", 
    "VBF",  
    "WH",
    "ZH",    
    "ttH",
    "WJetsLNu",
    "TTbar",
    "SingleTop",
    "Diboson",
    "EWKvjets",
    "DYJets",
    "WZQQ",
    "Data",
]

samples_dir = {
    "2016":    "../eos/July18_hww_2016",
    "2016APV": "../eos/July18_hww_2016APV",    
    "2017":    "../eos/July18_hww_2017",    
    "2018":    "../eos/July18_hww_2018",    
}

In [10]:
cutflows = {}
for year in years:
#     if year != "2016":
#         continue
    print(f"Processing year {year}")
    
    cutflows[year] = {}
    
    for ch in channels:
#         if ch != "mu": 
#             continue
        
        print(f"  {ch} channel")
        cutflows[year][ch] = {}

        condor_dir = os.listdir(samples_dir[year])

        for sample in condor_dir:

            # first: check if the sample is in one of combine_samples_by_name
            sample_to_use = None
            for key in utils.combine_samples_by_name:
                if key in sample:
                    sample_to_use = utils.combine_samples_by_name[key]
                    break

            # second: if not, combine under common label
            if sample_to_use is None:
                for key in utils.combine_samples:
                    if key in sample:
                        sample_to_use = utils.combine_samples[key]
                        break
                    else:
                        sample_to_use = sample

            if sample_to_use not in samples:
                continue

            is_data = False
            if sample_to_use == "Data":
                is_data = True

            out_files = f"{samples_dir[year]}/{sample}/outfiles/"
            pkl_files = glob.glob(f"{out_files}/*.pkl")

            if len(pkl_files) == 0:
                continue

            parquet_files = glob.glob(f"{out_files}/*_{ch}.parquet")
            
            try:
                data = pd.read_parquet(parquet_files)
            except pyarrow.lib.ArrowInvalid:
                # empty parquet because no event passed selection
#                 print(f"No parquet file for {sample}")
                continue

            if len(data) == 0:
#                 print(f"Hi, No parquet file for {sample}")
                continue
    
            if sample_to_use not in cutflows[year][ch].keys():
                cutflows[year][ch][sample_to_use] = get_cutflow(pkl_files, year, ch, sample, is_data)
            else:
                temp = get_cutflow(pkl_files, year, ch, sample, is_data)
                for key in cutflows[year][ch][sample_to_use]:
                    cutflows[year][ch][sample_to_use][key] += temp[key]
            
    print(f"------------------------------------------")

Processing year 2016
  ele channel
  mu channel
------------------------------------------
Processing year 2016APV
  ele channel
  mu channel
------------------------------------------


In [11]:
samples = cutflows["2017"]["ele"].keys()  # samples
samples

KeyError: '2017'

In [12]:
from make_stacked_hists import make_events_dict

presel = {
        "mu": {
            "fj_mass": "fj_mass>40",
            "THWW>0.50": "THWW>0.750",
        },
        "ele": {
            "fj_mass": "fj_mass>40",
            "THWW>0.50": "THWW>0.750",
        },
}

THWW_path = "../../weaver-core-dev/experiments_finetuning/v35_30/model.onnx"

events_dict = make_events_dict(years, channels, samples_dir, samples, presel, THWW_path)

INFO:root:Finding VBFHToWWToAny_M-125_TuneCP5_withDipoleRecoil samples and should combine them under VBF
INFO:root:Applying fj_mass selection on 822 events
INFO:root:Applying THWW>0.50 selection on 713 events
INFO:root:Will fill the VBF dataframe with the remaining 263 events
INFO:root:tot event weight 2.0417761938861987 

INFO:root:Finding fake_2016_ele_EWK_SF_Down.parquet samples and should combine them under EWKvjets
INFO:root:No parquet file for fake_2016_ele_EWK_SF_Down.parquet
INFO:root:Finding WJetsToLNu_HT-100To200 samples and should combine them under WJetsLNu
INFO:root:Applying fj_mass selection on 19 events
INFO:root:Applying THWW>0.50 selection on 12 events
INFO:root:Will fill the WJetsLNu dataframe with the remaining 0 events
INFO:root:tot event weight 0.0 

INFO:root:Finding SingleMuon_Run2016H samples and should combine them under Data
INFO:root:Finding SingleMuon_Run2016F samples and should combine them under Data
INFO:root:Finding EWKWminus_WToLNu samples and should co

INFO:root:Finding DYJetsToLL_LHEFilterPtZ-250To400 samples and should combine them under DYJets
INFO:root:Applying fj_mass selection on 369409 events
INFO:root:Applying THWW>0.50 selection on 336498 events
INFO:root:Will fill the DYJets dataframe with the remaining 743 events
INFO:root:tot event weight 3.6241356562573 

INFO:root:Finding ST_t-channel_antitop_4f_InclusiveDecays samples and should combine them under SingleTop
INFO:root:Applying fj_mass selection on 3809 events
INFO:root:Applying THWW>0.50 selection on 3674 events
INFO:root:Will fill the SingleTop dataframe with the remaining 15 events
INFO:root:tot event weight 0.5326805790932454 

INFO:root:Finding TTTo2L2Nu samples and should combine them under TTbar
INFO:root:Applying fj_mass selection on 65972 events
INFO:root:Applying THWW>0.50 selection on 62799 events
INFO:root:Will fill the TTbar dataframe with the remaining 415 events
INFO:root:tot event weight 10.772500595026177 

INFO:root:Finding EWKZ_ZToQQ samples and should

INFO:root:tot event weight 9.673381342752961 

INFO:root:Finding EWKZ_ZToNuNu samples and should combine them under EWKvjets
INFO:root:Finding SingleMuon_Run2016G samples and should combine them under Data
INFO:root:Applying fj_mass selection on 22084 events
INFO:root:Applying THWW>0.50 selection on 18390 events
INFO:root:Will fill the Data dataframe with the remaining 347 events
INFO:root:tot event weight 347.0 

INFO:root:Finding HWminusJ_HToWW_M-125 samples and should combine them under WH
INFO:root:Applying fj_mass selection on 1575 events
INFO:root:Applying THWW>0.50 selection on 1310 events
INFO:root:Will fill the WH dataframe with the remaining 461 events
INFO:root:tot event weight 0.4202568740264877 

INFO:root:Finding WJetsToLNu_HT-800To1200 samples and should combine them under WJetsLNu
INFO:root:Applying fj_mass selection on 62782 events
INFO:root:Applying THWW>0.50 selection on 57823 events
INFO:root:Will fill the WJetsLNu dataframe with the remaining 1573 events
INFO:root:

INFO:root:Applying fj_mass selection on 38 events
INFO:root:Applying THWW>0.50 selection on 31 events
INFO:root:Will fill the Diboson dataframe with the remaining 2 events
INFO:root:tot event weight 0.3946202693489349 

INFO:root:Finding TTToHadronic samples and should combine them under TTbar
INFO:root:Applying fj_mass selection on 3281 events
INFO:root:Applying THWW>0.50 selection on 3119 events
INFO:root:Will fill the TTbar dataframe with the remaining 16 events
INFO:root:tot event weight 0.8413151772370631 

INFO:root:Finding WJetsToQQ_HT-800toInf samples and should combine them under WZQQ
INFO:root:Applying fj_mass selection on 575 events
INFO:root:Applying THWW>0.50 selection on 487 events
INFO:root:Will fill the WZQQ dataframe with the remaining 8 events
INFO:root:tot event weight 0.745691382742618 

INFO:root:Finding ZJetsToQQ_HT-600to800 samples and should combine them under WZQQ
INFO:root:Applying fj_mass selection on 555 events
INFO:root:Applying THWW>0.50 selection on 490 e

INFO:root:tot event weight 9.61100917388433 

INFO:root:Finding EWKZ_ZToLL samples and should combine them under EWKvjets
INFO:root:Applying fj_mass selection on 497 events
INFO:root:Applying THWW>0.50 selection on 457 events
INFO:root:Will fill the EWKvjets dataframe with the remaining 4 events
INFO:root:tot event weight 0.9098272827922604 

INFO:root:Finding WJetsToLNu_HT-200To400 samples and should combine them under WJetsLNu
INFO:root:Applying fj_mass selection on 5558 events
INFO:root:Applying THWW>0.50 selection on 3400 events
INFO:root:Will fill the WJetsLNu dataframe with the remaining 58 events
INFO:root:tot event weight 28.43859513520682 

INFO:root:Finding ST_tW_top_5f_inclusiveDecays samples and should combine them under SingleTop
INFO:root:Applying fj_mass selection on 1520 events
INFO:root:Applying THWW>0.50 selection on 1348 events
INFO:root:Will fill the SingleTop dataframe with the remaining 29 events
INFO:root:tot event weight 7.182683689263468 

INFO:root:Finding Glu

INFO:root:Finding SingleMuon_Run2016B_ver2_HIPM samples and should combine them under Data
INFO:root:Finding SingleMuon_Run2016E_HIPM samples and should combine them under Data
INFO:root:Finding EWKWminus_WToQQ samples and should combine them under EWKvjets
INFO:root:Applying fj_mass selection on 32 events
INFO:root:Applying THWW>0.50 selection on 22 events
INFO:root:Will fill the EWKvjets dataframe with the remaining 0 events
INFO:root:tot event weight 0.0 

INFO:root:Finding WJetsToLNu_HT-600To800 samples and should combine them under WJetsLNu
INFO:root:Applying fj_mass selection on 32368 events
INFO:root:Applying THWW>0.50 selection on 30132 events
INFO:root:Will fill the WJetsLNu dataframe with the remaining 750 events
INFO:root:tot event weight 96.95922959380687 

INFO:root:Finding SingleElectron_Run2016C_HIPM samples and should combine them under Data
INFO:root:Applying fj_mass selection on 5696 events
INFO:root:Applying THWW>0.50 selection on 4953 events
INFO:root:Will fill the 

INFO:root:Finding WJetsToLNu_HT-200To400 samples and should combine them under WJetsLNu
INFO:root:Applying fj_mass selection on 6299 events
INFO:root:Applying THWW>0.50 selection on 3804 events
INFO:root:Will fill the WJetsLNu dataframe with the remaining 80 events
INFO:root:tot event weight 40.150735199354344 

INFO:root:Finding ST_tW_top_5f_inclusiveDecays samples and should combine them under SingleTop
INFO:root:Applying fj_mass selection on 1735 events
INFO:root:Applying THWW>0.50 selection on 1540 events
INFO:root:Will fill the SingleTop dataframe with the remaining 43 events
INFO:root:tot event weight 10.89618176734101 

INFO:root:Finding GluGluHToWW_Pt-200ToInf_M-125 samples and should combine them under ggF
INFO:root:Applying fj_mass selection on 3790 events
INFO:root:Applying THWW>0.50 selection on 3105 events
INFO:root:Will fill the ggF dataframe with the remaining 1292 events
INFO:root:tot event weight 8.745904766334407 

INFO:root:Finding DYJetsToLL_LHEFilterPtZ-650ToInf sa

INFO:root:tot event weight 176.0 

INFO:root:Finding EWKWminus_WToQQ samples and should combine them under EWKvjets
INFO:root:Applying fj_mass selection on 61 events
INFO:root:Applying THWW>0.50 selection on 47 events
INFO:root:Will fill the EWKvjets dataframe with the remaining 0 events
INFO:root:tot event weight 0.0 

INFO:root:Finding WJetsToLNu_HT-600To800 samples and should combine them under WJetsLNu
INFO:root:Applying fj_mass selection on 47533 events
INFO:root:Applying THWW>0.50 selection on 43214 events
INFO:root:Will fill the WJetsLNu dataframe with the remaining 1390 events
INFO:root:tot event weight 178.4888230535879 

INFO:root:Finding SingleElectron_Run2016C_HIPM samples and should combine them under Data
INFO:root:Finding WJetsToQQ_HT-600to800 samples and should combine them under WZQQ
INFO:root:Applying fj_mass selection on 386 events
INFO:root:Applying THWW>0.50 selection on 321 events
INFO:root:Will fill the WZQQ dataframe with the remaining 12 events
INFO:root:tot ev

# Add the cut to the cutflow dict

In [13]:
presel = {
        "mu": {
            "fj_mass": "fj_mass>40",
            "THWW>0.75": "fj_mass>40 & THWW>0.75",
        },
        "ele": {
            "fj_mass": "fj_mass>40",            
            "THWW>0.75": "fj_mass>40 & THWW>0.75",
        },
}

In [14]:
for ch in channels:
    for cut, sel in list(presel[ch].items()):
        for sample in samples:
            for year in years:

                df = events_dict[year][ch][sample]
                df = df.query(sel)
                
                w = df["nominal"]

                cutflows[year][ch][sample][cut] = w.sum()

In [15]:
cutflows["2016APV"]["mu"]["WJetsLNu"]     # take a quick look

{'sumgenweight': 32148205.541996155,
 'Trigger': 5052102.989037976,
 'METFilters': 5049012.531204127,
 'OneLep': 5031057.850622146,
 'NoTaus': 4271447.470579825,
 'AtLeastOneFatJet': 461978.1002925532,
 'CandidateJetpT': 196124.03115179407,
 'LepInJet': 68289.06508691012,
 'JetLepOverlap': 26003.119346261694,
 'dPhiJetMET': 18333.833322064966,
 'MET': 16980.930607238028,
 'fj_mass': 560.3813816501639,
 'THWW>0.75': 560.3813816501639}

In [16]:
cutflows["2016APV"]["ele"]["WJetsLNu"]     # take a quick look

{'sumgenweight': 32241758.10101272,
 'Trigger': 4454643.38131529,
 'METFilters': 4451955.991847101,
 'OneLep': 3062734.5054372367,
 'NoTaus': 3062734.5054372367,
 'AtLeastOneFatJet': 364780.748501086,
 'CandidateJetpT': 153895.86376889877,
 'LepInJet': 58938.59730123174,
 'JetLepOverlap': 19206.882023214886,
 'dPhiJetMET': 13362.535289163337,
 'MET': 12293.663142455729,
 'fj_mass': 323.25912091618096,
 'THWW>0.75': 323.25912091618096}

# Combine different channels

In [17]:
common_cuts = cutflows["2018"]["mu"]["WJetsLNu"]
common_cuts

KeyError: '2018'

In [18]:
def combine_channels(cutflows):

    # combine both channels
    cutflows_new = {}
    for year in cutflows.keys():
        cutflows_new[year] = {}
        cutflows_new[year]["lep"] = {}
        
        for ch in ["mu", "ele"]:
            for sample in cutflows[year][ch]:
                                
                if sample not in cutflows_new[year]["lep"]:
                    cutflows_new[year]["lep"][sample] = {}
                
                for cut in cutflows[year][ch][sample]:
                    
                    if (year != "2018") and (cut == "HEMCleaning"):
                        continue
                    
                    if cut not in cutflows_new[year]["lep"][sample]:
                        cutflows_new[year]["lep"][sample][cut] = cutflows[year][ch][sample][cut]
                    else:
                        cutflows_new[year]["lep"][sample][cut] += cutflows[year][ch][sample][cut]
        cutflows[year] = {**cutflows[year], **cutflows_new[year]}
        
    return cutflows

In [19]:
cutflows = combine_channels(cutflows)

In [20]:
cutflows["2018"].keys()

KeyError: '2018'

In [21]:
cutflows["2018"]["ele"]["WJetsLNu"]

KeyError: '2018'

In [22]:
cutflows["2018"]["mu"]["WJetsLNu"]

KeyError: '2018'

In [23]:
cutflows["2018"]["lep"]["WJetsLNu"]

KeyError: '2018'

# Combine different years

In [24]:
def combine_years(cutflows):
    """Will remove the HEM cleaning cutflow from 2018 first."""
    
    whatever_year = list(cutflows.keys())[0]
    channels = cutflows[whatever_year].keys()
    
    # combine all years
    cutflows_new = {}
    cutflows_new["Run2"] = {}
    
    for ch in channels:
        cutflows_new["Run2"][ch] = {}
        
        for year in cutflows:
            for sample in cutflows[year][ch]:
                
                if sample not in cutflows_new["Run2"][ch]:
                    cutflows_new["Run2"][ch][sample] = {}

                for cut in cutflows[year][ch][sample]:
                    if "HEM" in cut:
                        continue
                    if cut not in cutflows_new["Run2"][ch][sample]:
                        cutflows_new["Run2"][ch][sample][cut] = cutflows[year][ch][sample][cut]
                    else:
                        cutflows_new["Run2"][ch][sample][cut] += cutflows[year][ch][sample][cut]

    cutflows = {**cutflows, **cutflows_new}

    return cutflows

In [25]:
cutflows = combine_years(cutflows)

In [26]:
cutflows["2016"]["ele"].keys()

dict_keys(['VBF', 'WJetsLNu', 'EWKvjets', 'WH', 'TTbar', 'Data', 'SingleTop', 'ggF', 'DYJets', 'Diboson', 'WZQQ', 'ttH', 'ZH'])

In [27]:
cutflows.keys()

dict_keys(['2016', '2016APV', 'Run2'])

In [28]:
cutflows["Run2"].keys()

dict_keys(['ele', 'mu', 'lep'])

# Combine non-dominant backgrounds

In [29]:
# combine non-dominant backgrounds under others
dominant_bkgs = ["WJetsLNu", "TTbar"]
signals = ["ggF", "VH", "WH", "ZH", "ttH"]

for year in cutflows:
    for ch in cutflows[year]:
        cutflows[year][ch]["Others"] = dict.fromkeys(cutflows[year][ch]["WJetsLNu"], 0)
        for sample in cutflows[year][ch]:
            if sample == "Data":
                continue
            if sample not in signals+dominant_bkgs:
                for cut in cutflows[year][ch][sample]:
                    cutflows[year][ch]["Others"][cut] += cutflows[year][ch][sample][cut]

In [30]:
cutflows["2018"]["ele"].keys()

KeyError: '2018'

In [31]:
cutflows["2018"]["lep"]["Others"]

KeyError: '2018'

# LateX cutflow table

In [32]:
cuts = [
    "sumgenweight",
    "Trigger",
    "METFilters",
    "OneLep",        
    "NoTaus",
    "AtLeastOneFatJet",
    "CandidateJetpT",
    "LepInJet",
    "JetLepOverlap",
    "dPhiJetMET",
    "MET",
    "HEMCleaning",
]

for cut in presel["mu"]:
    cuts += [cut]

In [33]:
cut_to_label = {
    "sumgenweight": "sumgenweight",        
    "HEMCleaning": "HEMCleaning",    
    "Trigger": "Trigger",
    "METFilters": "METFilters",
    "OneLep": "n Leptons = 1",
    "NoTaus": "n Taus = 0",
    "AtLeastOneFatJet": r"n FatJets $>=$ 1",
    "CandidateJetpT": r"j $p_T > 250$GeV",
    "LepInJet": r"$\Delta R(j, \ell) < 0.8$",
    "JetLepOverlap": r"$\Delta R(j, \ell) > 0.03$",
    "dPhiJetMET": r"$\Delta \phi(\mathrm{MET}, j)<1.57$",
    "MET": r"$\mathrm{MET}>20$",
    
    "None": "None",

    "fj_mass": r"j $\mathrm{softdrop} > 40$GeV",
    
    "THWW>0.75": r"$\ensuremath{T_{\text{HWW}}^{\ell\nu qq}} > 0.75$",
} 


In [34]:
parquet_to_latex = {
    "WJetsLNu": "$\PW(\Pell\PGn)$+",
    "TTbar": "\\ttbar",
    "Others": "Other MC",

    "ggF": "ggF",
    "VBF": "VBF",
    "WH": "WH",
    "ZH": "ZH",    
    "ttH": "$t\\bar{t}H$",    
    
    "Data": "Data",
}

def make_latex_cutflow_table(cutflows_dict, year, ch, add_data=False, add_sumgenweight=False):
    """Will use the cutflows dictionary to make the LateX table we have in the AN."""
    
    samples_bkg = ["WJetsLNu", "TTbar", "Others"]
    samples_sig = ["ggF","VBF", "WH", "ZH", "ttH"]

    ### backgrounds
    headers = [parquet_to_latex[s] for s in samples_bkg]
    
    textabular = f"l{'r'*len(headers)}"
    textabular += "|r"
    
    texheader = "\\textbf{Inclusive Selection}" + " & " + " & ".join(headers) + " & Total MC "
    if add_data:
        textabular += "|r"
        texheader += "& Data "
    texheader += "\\\\"
    texdata = "\\hline\n"
    
    data = dict()
    
    for cut in cuts: 
        if (year != "2018") and (cut == "HEMCleaning"):
            continue
            
        if not add_sumgenweight and cut == "sumgenweight":
            continue
    
        data[cut] = []

        for sample in samples_bkg:            
            data[cut].append(round(cutflows_dict[year][ch][sample][cut]))
            
        totalmc = 0
        for sample in (samples_bkg + samples_sig):
            totalmc += round(cutflows_dict[year][ch][sample][cut])
            
        data[cut].append(totalmc)
        
        if add_data:
            data[cut].append(round(cutflows_dict[year][ch]["Data"][cut]))

    for label in data:
        if label == "z":
            texdata += "\\hline\n"
        texdata += f"{cut_to_label[label]} & {' & '.join(map(str,data[label]))} \\\\\n"
        
    texdata += "\\hline\n"    

    ### signal
    headers2 = [parquet_to_latex[s] for s in samples_sig]
    texheader2 = " & " + " & ".join(headers2) + "\\\\"
    texdata2 = "\\hline\n"

    textabular2 = f"l{'r'*len(headers2)}"
    
    data = dict()
    for cut in cuts:
        if (year != "2018") and (cut == "HEMCleaning"):
            continue
            
        data[cut] = []

        for sample in samples_sig:
            data[cut].append(round(cutflows_dict[year][ch][sample][cut]))
        
    for label in data:
        if label == "z":
            texdata += "\\hline\n"
        texdata2 += f"{cut_to_label[label]} & {' & '.join(map(str,data[label]))} \\\\\n"    

    # make table
    print("\\begin{table}[!htp]")
    print("\\begin{center}")
    
    print("\\begin{tabular}{"+textabular+"}")
    print(texheader)
    print(texdata,end="")
    print("\\end{tabular}")

    print("\\begin{tabular}{"+textabular2+"}")
    print(texheader2)
    print(texdata2,end="")
    print("\\end{tabular}")
    
    
    if ch == "lep":
        print("\\caption{Event yield of " + year + " Monte Carlo samples normalized to " + str(round(get_lumi([year], [ch]))) + "\\fbinv.}")        
    else:
        print("\\caption{Event yield of " + ch + " channel " + year + " Monte Carlo samples normalized to " + str(round(get_lumi([year], [ch]))) + "\\fbinv.}")
        
    print("\\label{sel-tab-cutflow" + year + "}")
    print("\\end{center}")
    print("\\end{table}")    

In [37]:
make_latex_cutflow_table(cutflows, "2016", "lep", add_data=True, add_sumgenweight=True)

\begin{table}[!htp]
\begin{center}
\begin{tabular}{lrrr|r|r}
\textbf{Inclusive Selection} & $\PW(\Pell\PGn)$+ & \ttbar & Other MC & Total MC & Data \\
\hline
sumgenweight & 34495755 & 27964378 & 206072910 & 268588181 & 615115660 \\
Trigger & 5073520 & 3014113 & 30758671 & 38851228 & 340441292 \\
METFilters & 5070260 & 3011980 & 30743881 & 38831040 & 340346213 \\
n Leptons = 1 & 3898866 & 2429647 & 19258350 & 25590602 & 174430987 \\
n Taus = 0 & 3737633 & 1940154 & 17943360 & 23624147 & 168036783 \\
n FatJets $>=$ 1 & 700000 & 355885 & 497342 & 1553908 & 1928098 \\
j $p_T > 250$GeV & 317487 & 177748 & 223761 & 719364 & 765173 \\
$\Delta R(j, \ell) < 0.8$ & 113857 & 77714 & 107066 & 298829 & 382488 \\
$\Delta R(j, \ell) > 0.03$ & 41009 & 67736 & 42216 & 151134 & 175116 \\
$\Delta \phi(\mathrm{MET}, j)<1.57$ & 28841 & 43044 & 25441 & 97444 & 97479 \\
$\mathrm{MET}>20$ & 26663 & 40342 & 21242 & 88355 & 87106 \\
j $\mathrm{softdrop} > 40$GeV & 784 & 356 & 284 & 1448 & 1282 \\
$\ensuremath{T