In [17]:
import utils
import plotting
import postprocessing
import numpy as np
import warnings
import pandas as pd
from pandas.errors import SettingWithCopyWarning
from sample_labels import samples, sig_key, qcd_key, data_key
import os
from utils import CUT_MAX_VAL

# ignore these because they don't seem to apply
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

from PyPDF2 import PdfFileMerger

import pickle

from copy import deepcopy

import matplotlib.pyplot as plt
import mplhep as hep
import matplotlib.ticker as mticker

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
plt.rcParams.update({"font.size": 16})

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
MAIN_DIR = "../../../"
samples_dir = f"{MAIN_DIR}/../data/skimmer/Jan31"
signal_samples_dir = f"{MAIN_DIR}/../data/skimmer/Feb3"
year = "2017"

# plot_dir = "../../../plots/PostProcessing/Feb15"
# templates_dir = "templates/Jan31/"
# _ = os.system(f"mkdir -p {plot_dir}")
# _ = os.system(f"mkdir -p {templates_dir}")

In [4]:
# Both Jet's Msds > 50 & at least one jet with Txbb > 0.8
filters = [
    [
        ("('ak8FatJetMsd', '0')", ">=", 50),
        ("('ak8FatJetMsd', '1')", ">=", 50),
        ("('ak8FatJetParticleNetMD_Txbb', '0')", ">=", 0.8),
    ],
    [
        ("('ak8FatJetMsd', '0')", ">=", 50),
        ("('ak8FatJetMsd', '1')", ">=", 50),
        ("('ak8FatJetParticleNetMD_Txbb', '1')", ">=", 0.8),
    ],
]

# {label: {cutvar: [min, max], ...}, ...}
selection_regions = {
    "passCat1": {
        "BDTScore": [0.986, CUT_MAX_VAL],
        "bbFatJetParticleNetMD_Txbb": [0.976, CUT_MAX_VAL],
    },
    "BDTOnly": {
        "BDTScore": [0.986, CUT_MAX_VAL],
    },
    "fail": {
        "bbFatJetParticleNetMD_Txbb": [0.8, 0.976],
    },
}

# bb msd is final shape var
shape_var = ("bbFatJetMsd", r"$m^{bb}$ (GeV)")
shape_bins = [20, 50, 250]  # num bins, min, max
blind_window = [100, 150]

Load samples

In [5]:
cutflow = pd.DataFrame(index=list(samples.keys()))
events_dict = utils.load_samples(signal_samples_dir, {sig_key: samples[sig_key]}, year, filters)
events_dict |= utils.load_samples(samples_dir, {k: samples[k] for k in samples.keys() - [sig_key]}, year, filters)
utils.add_to_cutflow(events_dict, "BDTPreselection", "weight", cutflow)

# print weighted sample yields
for sample in events_dict:
    tot_weight = np.sum(events_dict[sample]["weight"].values)
    print(f"Pre-selection {sample} yield: {tot_weight:.2f}")

Pre-selection HHbbVV yield: 3.93
Pre-selection QCD yield: 3225080.38
Pre-selection W+Jets yield: 22355.24
Pre-selection Data yield: 869653.00
Pre-selection TT yield: 219865.36


Load Trigger SFs

In [8]:
combined = {}
years = ["2017"]

for year in years:
    with open(
        f"../corrections/trigEffs/{year}_combined.pkl", "rb"
    ) as filehandler:
        combined[year] = pickle.load(filehandler)

In [12]:
effs = {
    year: combined[year]["num"][:, :, :, :] / combined[year]["den"][:, :, :, :]
    for year in years
}
effs_txbb = {
    year: combined[year]["num"][:, sum, :, :] / combined[year]["den"][:, sum, :, :]
    for year in years
}
effs_th4q = {
    year: combined[year]["num"][sum, :, :, :] / combined[year]["den"][sum, :, :, :]
    for year in years
}

In [23]:
effs_txbb[year]

Hist(
  Variable([0, 0.9, 0.95, 0.98, 1], name='jet1txbb', label='$T_{Xbb}$ Score'),
  Regular(50, 0, 1000, name='jet1pt', label='$p_T$ (GeV)'),
  Regular(15, 0, 300, name='jet1msd', label='$m_{SD}$ (GeV)'),
  storage=Double()) # Sum: nan (nan with flow)

In [18]:
from hist.intervals import clopper_pearson_interval
from coffea.lookup_tools.dense_lookup import dense_lookup

trig_errors = {}
trig_intervals = {}
errors_txbb = {}
intervals_txbb = {}
errors_th4q = {}
intervals_th4q = {}

for year in years:
    intervals = clopper_pearson_interval(
        combined[year]["num"].view(flow=False),
        combined[year]["den"].view(flow=False),
    )
    trig_errors[year] = (intervals[1] - intervals[0]) / 2
    trig_intervals[year] = intervals

    intervals = clopper_pearson_interval(
        combined[year]["num"][:, sum, :, :].view(flow=False),
        combined[year]["den"][:, sum, :, :].view(flow=False),
    )
    errors_txbb[year] = (intervals[1] - intervals[0]) / 2
    intervals_txbb[year] = intervals

    intervals = clopper_pearson_interval(
        combined[year]["num"][sum, :, :, :].view(flow=False),
        combined[year]["den"][sum, :, :, :].view(flow=False),
    )
    errors_th4q[year] = (intervals[1] - intervals[0]) / 2
    intervals_th4q[year] = intervals

In [22]:
trig_intervals[year].shape

(2, 4, 4, 50, 15)

In [24]:
ak8TrigEffsLookup = dense_lookup(
    np.nan_to_num(effs[year].view(flow=False), 0), np.squeeze(effs[year].axes.edges)
)

weight_key = "weight_4d"

for sample in events_dict:
    events = events_dict[sample]
    if sample == data_key:
        events[weight_key] = events["weight"]
    else:
        fj_trigeffs = ak8TrigEffsLookup(
            events["ak8FatJetParticleNetMD_Txbb"].values,
            events["ak8FatJetParticleNet_Th4q"].values,
            events["ak8FatJetPt"].values,
            events["ak8FatJetMsd"].values,
        )
        # combined eff = 1 - (1 - fj1_eff) * (1 - fj2_eff)
        combined_trigEffs = 1 - np.prod(1 - fj_trigeffs, axis=1, keepdims=True)
        events[weight_key] = events["weight"] * combined_trigEffs


In [25]:
ak8TrigEffsLookup = dense_lookup(
    np.nan_to_num(effs_txbb[year].view(flow=False), 0), np.squeeze(effs_txbb[year].axes.edges)
)

weight_key = "weight_3d"

for sample in events_dict:
    events = events_dict[sample]
    if sample == data_key:
        events[weight_key] = events["weight"]
    else:
        fj_trigeffs = ak8TrigEffsLookup(
            events["ak8FatJetParticleNetMD_Txbb"].values,
            events["ak8FatJetPt"].values,
            events["ak8FatJetMsd"].values,
        )
        # combined eff = 1 - (1 - fj1_eff) * (1 - fj2_eff)
        combined_trigEffs = 1 - np.prod(1 - fj_trigeffs, axis=1, keepdims=True)
        events[weight_key] = events["weight"] * combined_trigEffs


In [26]:
# print weighted sample yields
for sample in events_dict:
    tot_weight = np.sum(events_dict[sample]["weight_4d"].values)
    print(f"Pre-selection {sample} yield: {tot_weight:.2f}")

Pre-selection HHbbVV yield: 1.58
Pre-selection QCD yield: 886995.23
Pre-selection W+Jets yield: 8973.01
Pre-selection Data yield: 869653.00
Pre-selection TT yield: 97296.24


In [27]:
# print weighted sample yields
for sample in events_dict:
    tot_weight = np.sum(events_dict[sample]["weight_3d"].values)
    print(f"Pre-selection {sample} yield: {tot_weight:.2f}")

Pre-selection HHbbVV yield: 1.56
Pre-selection QCD yield: 887113.86
Pre-selection W+Jets yield: 8972.77
Pre-selection Data yield: 869653.00
Pre-selection TT yield: 97386.16
