# Make...
- loads an output pkl file processed by the `trigger_efficiencies_processor.py` June 25th iteration which does not apply the dxy, dz requirements on the Muons but saves them as booleans `muon_dxy`, `muon_dz`
- checks the effect of the dxy, dz requirements on the trigger efficiency plots

In [1]:
import glob
import json
import os
import pickle
import pickle as pkl

import hist as hist2
import matplotlib.pyplot as plt
import mplhep as hep
import numpy as np
import pandas as pd
import pyarrow.parquet as pq
import yaml
from hist.intervals import clopper_pearson_interval
from scipy.special import softmax
from sklearn.metrics import auc, roc_curve

import matplotlib.pyplot as plt

import sys
sys.path
sys.path.append("../python/")
import utils
from make_stacked_hists import make_events_dict

pd.options.mode.chained_assignment = None

plt.rcParams.update({"font.size": 20})
plt.style.use(hep.style.CMS)

In [2]:
%load_ext autoreload
%autoreload 2

In [8]:
 # define your regions here
presel = {
        "mu": {
            "msoftdrop": "fj_mass>40",
        },
        "ele": {
            "msoftdrop": "fj_mass>40",
        },
}

years = ["2017", "2016", "2016APV", "2018"]
channels = ["ele", "mu"]

samples = [
    "ggF", 
    "VBF",  
    "WH",
    "ZH",    
    "ttH",
    "HTauTau",
]

samples_dir = {
    "2016":    "../eos/May31_hww_2016",
    "2016APV": "../eos/May31_hww_2016APV",
    "2017":    "../eos/May31_hww_2017",
    "2018":    "../eos/May31_hww_2018",
}

events_dict = {}
for year in years:
    
    out = make_events_dict([year], channels, samples_dir[year], samples, presel)
    events_dict = {**events_dict, **out}

INFO:root:Finding VBFHToWWToAny_M-125_TuneCP5_withDipoleRecoil samples and should combine them under VBF
INFO:root:Applying msoftdrop selection on 1745 events
INFO:root:Will fill the VBF dataframe with the remaining 1481 events
INFO:root:tot event weight 12.413656103948622 

INFO:root:Finding HWminusJ_HToWW_M-125 samples and should combine them under WH
INFO:root:Applying msoftdrop selection on 4084 events
INFO:root:Will fill the WH dataframe with the remaining 3431 events
INFO:root:tot event weight 2.426111906111235 

INFO:root:Finding GluGluHToWW_Pt-200ToInf_M-125 samples and should combine them under ggF
INFO:root:Applying msoftdrop selection on 4951 events
INFO:root:Will fill the ggF dataframe with the remaining 4253 events
INFO:root:tot event weight 35.44621675087096 

INFO:root:Finding HWplusJ_HToWW_M-125 samples and should combine them under WH
INFO:root:Applying msoftdrop selection on 4843 events
INFO:root:Will fill the WH dataframe with the remaining 4094 events
INFO:root:tot 

INFO:root:Finding VBFHToWWToAny_M-125_TuneCP5_withDipoleRecoil samples and should combine them under VBF
INFO:root:Applying msoftdrop selection on 488 events
INFO:root:Will fill the VBF dataframe with the remaining 414 events
INFO:root:tot event weight 4.828082732522468 

INFO:root:Finding HWminusJ_HToWW_M-125 samples and should combine them under WH
INFO:root:Applying msoftdrop selection on 1463 events
INFO:root:Will fill the WH dataframe with the remaining 1235 events
INFO:root:tot event weight 0.9452311438060224 

INFO:root:Finding GluGluHToWW_Pt-200ToInf_M-125 samples and should combine them under ggF
INFO:root:Applying msoftdrop selection on 2389 events
INFO:root:Will fill the ggF dataframe with the remaining 2028 events
INFO:root:tot event weight 13.837643586222953 

INFO:root:Finding HWplusJ_HToWW_M-125 samples and should combine them under WH
INFO:root:Applying msoftdrop selection on 1871 events
INFO:root:Will fill the WH dataframe with the remaining 1560 events
INFO:root:tot e

In [13]:
events_dict["2016APV"]['ele'].keys()

dict_keys(['VBF', 'WH', 'ggF', 'ttH', 'ZH'])

In [19]:
def make_composition_table_sig(ev, presel):

    from collections import OrderedDict

    all_MCsamples = [
        "ggF", 
        "VBF",
        "ttH",
        "WH",
        "ZH", 
        "HTauTau"
    ]

    sig = ["VBF", "ggF"]         
    bkg = ["ttH", "WH", "ZH", "HTauTau"]

    mass_window = [50, 150]
    
    # make table
    print("\\begin{table}[!ht]")
    print("\\begin{center}")
    
    print("\\caption{Event yield contribution from different Higgs processes at pre-selection level and in the signal-like region, defined by a high tagger score, for the full Run2 dataset. The contribution of H(tau-tau) decays is negligible in both regions.}")
            
    print("\\begin{tabular}{c|ccccc|c}")

    print("& \\multicolumn{5}{c|}{Higgs Signal yields} & \\multicolumn{1}{c}{Higgs Background yields} \\\\\n")
    print("& VBF & ggF & \\ttH & WH & ZH & HTauTau \\\\\n")        
    print("\\hline")
    print("\\hline")

    for region, sel in presel.items():

        sig_dict = OrderedDict({
            "VBF": 0,
            "ggF": 0,     
            "ttH": 0,
            "WH": 0, 
            "ZH": 0,            
        })

        bkg_dict = OrderedDict({

            "HTauTau": 0,
        })

        tot_sig, tot_bkg = 0, 0
        s, b = 0, 0   # in mass window [100, 150]
        for year in years:
            for ch in channels:
                for sample in all_MCsamples:

                    try:
                        df = ev[year][ch][sample]
                    except:
                        continue

                    df = df.query(sel)  

                    if sample in sig_dict:
                        tot_sig += df["event_weight"].sum()                    
                        sig_dict[sample] += df["event_weight"].sum()
                    elif sample in bkg_dict:
                        tot_bkg += df["event_weight"].sum()
                        bkg_dict[sample] += df["event_weight"].sum()
                    else:
                        tot_bkg += df["event_weight"].sum()                        
                        bkg_dict["Others"] += df["event_weight"].sum()                    

                    ######################## soverb start
                    df = df[(df["rec_higgs_m"]>=mass_window[0]) & (df["rec_higgs_m"]<=mass_window[1])]

                    if sample in sig_dict:                
                        s += df["event_weight"].sum()
                    else:
                        b += df["event_weight"].sum()
                    ######################## soverb end


        texdata = region
        for sample in sig_dict:
            texdata += f" & {(sig_dict[sample]):.0f}"
        for sample in bkg_dict:
            texdata += f" & {(bkg_dict[sample]):.0f}"
        
        texdata += " \\\\\n"
        print(texdata)
        print("\\hline")

    print("\\hline")
    print("\\end{tabular}")
    print("\\label{tab:sigproductionmodes}")
    print("\\end{center}")
    print("\\end{table}")

In [20]:
presel = {
    "Pre-selection": f"THWW>0.5", # dummy
    r"$T_{HWW}> 0.9$": f"THWW>0.9",
}

make_composition_table_sig(events_dict, presel)

\begin{table}[!ht]
\begin{center}
\caption{Event yield contribution from different Higgs processes at pre-selection level and in the signal-like region, defined by a high tagger score, for the full Run2 dataset. The contribution of H(tau-tau) decays is negligible in both regions.}
\begin{tabular}{c|ccccc|c}
& \multicolumn{5}{c|}{Higgs Signal yields} & \multicolumn{1}{c}{Higgs Background yields} \\

& VBF & ggF & \ttH & WH & ZH & HTauTau \\

\hline
\hline
Pre-selection & 56 & 154 & 30 & 27 & 15 & 0 \\

\hline
$T_{HWW}> 0.9$ & 22 & 54 & 6 & 9 & 5 & 0 \\

\hline
\hline
\end{tabular}
\label{tab:sigproductionmodes}
\end{center}
\end{table}
