# Investigating LP Reweighting

In [5]:
import utils
import plotting
import postprocessing
import numpy as np
import warnings
import pandas as pd
from pandas.errors import SettingWithCopyWarning
from hh_vars import nonres_samples, samples, nonres_sig_keys, data_key

# ignore these because they don't seem to apply
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

from PyPDF2 import PdfFileMerger

from copy import deepcopy

import matplotlib.pyplot as plt
import mplhep as hep
import matplotlib.ticker as mticker

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
plt.rcParams.update({"font.size": 16})

In [4]:
%load_ext autoreload
%autoreload 2

In [6]:
MAIN_DIR = "../../../"
signal_samples_dir = f"{MAIN_DIR}/../data/skimmer/Feb3"
year = "2017"

# Both Jet's Msds > 50
filters = [
    [
        ("('ak8FatJetMsd', '0')", ">=", 50),
        ("('ak8FatJetMsd', '1')", ">=", 50),
    ],
]

sig_key = nonres_sig_keys[0]
events_dict = utils.load_samples(
    signal_samples_dir, {sig_key: nonres_samples[sig_key]}, year, filters
)
tot_weight = np.sum(events_dict[sig_key]["weight"]).values[0]
print(f"Pre-selection {nonres_samples[sig_key]} yield: {tot_weight:.2f}")

cutflow = pd.DataFrame(index=[sig_key])

Loaded GluGluToHHTobbVV_node_cHHH1                       : 236504 entries
Pre-selection GluGluToHHTobbVV yield: 5.12


In [7]:
postprocessing.apply_weights(events_dict, year, cutflow)
bb_masks = postprocessing.bb_VV_assignment(events_dict)[sig_key]
events = events_dict[sig_key]

In [10]:
events_dict[sig_key] = postprocessing.postprocess_lpsfs(events)
events = events_dict[sig_key]
events["finalWeight_preLP"] = events["finalWeight"]
events["finalWeight"] = events["finalWeight"] * events["VV_lp_sf_nom"][0]

In [11]:
events["finalWeight_preLP"] = events["finalWeight"]
events["finalWeight"] = events["finalWeight"] * events["VV_lp_sf_nom"][0]

In [36]:
VV_cut_events = events[utils.get_feat(events, "VVFatJetParTMD_THWW4q", bb_masks) > 0.975]
VV_pt_cut_events = events[
    (utils.get_feat(events, "VVFatJetParTMD_THWW4q", bb_masks) > 0.975)
    * (utils.get_feat(events, "VVFatJetPt", bb_masks) > 500)
]
bb_cut_events = events[
    utils.get_feat(events, "bbFatJetParticleNetMD_Txbb", bb_masks)
    > postprocessing.txbb_wps[year]["HP"]
]

In [35]:
ptcut = events[utils.get_feat(events, "VVFatJetPt", bb_masks) > 1000]
ptunc = np.sum(ptcut["VV_lp_sf_num_sjpt_gt350"]) / np.sum(np.sum(ptcut["ak8FatJetHVV"])) * 0.21
print(f"pT extrapolation unc. for > 1TeV jets: {ptunc.values[0]:.2f}")

pT extrapolation unc. for > 1TeV jets: 0.23


In [14]:
vvsf, unc, uncs = postprocessing.get_lpsf(VV_cut_events)
tot_rel_unc = np.linalg.norm([val for val in uncs.values()])
# tot_unc = vvsf * tot_rel_unc

print(f"SF: {vvsf:.2f} ± {unc:.2f}")
for key, val in uncs.items():
    print(f"{key}: {val:.2f}")
print(f"Combined: {tot_rel_unc:.2f}")

SF: 0.77 ± 0.14
syst_unc: 0.14
stat_unc: 0.04
sj_pt_unc: 0.00
sj_matching_unc: 0.12
Combined: 0.19


In [20]:
vvsf, unc, uncs = postprocessing.get_lpsf(VV_pt_cut_events)
tot_rel_unc = np.linalg.norm([val for val in uncs.values()])
# tot_unc = vvsf * tot_rel_unc

print(f"SF: {vvsf:.2f} ± {unc:.2f}")
for key, val in uncs.items():
    print(f"{key}: {val:.2f}")
print(f"Combined: {tot_rel_unc:.2f}")

SF: 0.73 ± 0.18
syst_unc: 0.22
stat_unc: 0.07
sj_pt_unc: 0.02
sj_matching_unc: 0.10
Combined: 0.25


In [37]:
vvsf, unc, uncs = postprocessing.get_lpsf(bb_cut_events, VV=False)
tot_rel_unc = np.linalg.norm([val for val in uncs.values()])
# tot_unc = vvsf * tot_rel_unc

print(f"SF: {vvsf:.2f} ± {unc:.2f}")
for key, val in uncs.items():
    print(f"{key}: {val:.2f}")
print(f"Combined: {tot_rel_unc:.2f}")

SF: 0.87 ± 0.17
syst_unc: 0.19
stat_unc: 0.05
sj_pt_unc: 0.02
sj_matching_unc: 0.02
Combined: 0.20


In [46]:
pt_bins = np.array([450, 500, 600, 100000])
bbsfs = []

for wp in ["HP", "MP", "LP"]:
    tarr = []
    for i, (low, high) in enumerate(zip(pt_bins[:-1], pt_bins[1:])):
        cut_events = events[
            (utils.get_feat(events, "bbFatJetPt", bb_masks) >= low)
            * (utils.get_feat(events, "bbFatJetPt", bb_masks) < high)
        ]

        cut_bb_masks = bb_masks[
            (utils.get_feat(events, "bbFatJetPt", bb_masks) >= low)
            * (utils.get_feat(events, "bbFatJetPt", bb_masks) < high)
        ]

        for key in ["lp_sf_nom", "lp_sf_toys", "lp_sf_sys_down", "lp_sf_sys_up"]:
            # normalise
            cut_events[f"bb_{key}"] = cut_events[f"bb_{key}"] / np.mean(
                cut_events[f"bb_{key}"], axis=0
            )

        cut_events = cut_events[
            (
                utils.get_feat(cut_events, "bbFatJetParticleNetMD_Txbb", cut_bb_masks)
                > postprocessing.txbb_wps[year][wp]
            )
        ]

        sf, unc, uncs = postprocessing.get_lpsf(cut_events, VV=False)

        tarr.append(f"{sf:.2f} ± {unc:.2f}")

    bbsfs.append(tarr)

In [47]:
pd.DataFrame(bbsfs)

Unnamed: 0,0,1,2
0,0.82 ± 0.04,0.86 ± 0.13,0.84 ± 0.28
1,0.85 ± 0.05,0.88 ± 0.15,0.88 ± 0.29
2,0.91 ± 0.07,0.93 ± 0.15,0.89 ± 0.29
