In [67]:
import utils
import plotting
import postprocessing
import corrections

from utils import CUT_MAX_VAL
from hh_vars import (
    years,
    data_key,
    qcd_key,
    bg_keys,
    samples,
    nonres_sig_keys,
    res_samples,
    res_sig_keys,
    txbb_wps,
    jec_shifts,
    jmsr_shifts,
)
from postprocessing import (
    res_shape_vars,
    res_selection_regions,
    selection_regions_label,
    # selection_regions_year,
)

import numpy as np
import pandas as pd
import pickle, json
from pandas.errors import SettingWithCopyWarning
from hist import Hist

import os
from copy import deepcopy
from inspect import cleandoc
import warnings

# ignore these because they don't seem to apply
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
sig_split_points = [
    [
        (1000, 125),
        (1400, 125),
        (1800, 125),
        (2200, 125),
        (3000, 125),
    ],
    [
        (1400, 150),
        (1800, 150),
        (1800, 190),
        (2200, 150),
        (2200, 190),
        (2200, 250),
    ],
    [
        (3000, 125),
        (3000, 150),
        (3000, 190),
        (3000, 250),
        (3000, 350),
    ],
]

sig_splits = [
    ["HHbbVV"] + [f"X[{mX}]->H(bb)Y[{mY}](VV)" for (mX, mY) in mps] for mps in sig_split_points
]

In [69]:
MAIN_DIR = "../../../"
samples_dir = f"{MAIN_DIR}/../data/skimmer/Feb24"
signal_samples_dir = f"{MAIN_DIR}/../data/skimmer/Mar10_2"
year = "2017"

date = "Mar24"
plot_dir = f"../../../plots/PostProcessing/{date}/"
templates_dir = f"templates/{date}/"
for i in range(len(sig_splits)):
    _ = os.system(f"mkdir -p {plot_dir}/ControlPlots/{year}/sigs{i}/")
_ = os.system(f"mkdir -p {plot_dir}/cutflows/")
_ = os.system(f"mkdir -p {plot_dir}/templates/wshifts")
_ = os.system(f"mkdir -p {plot_dir}/templates/jshifts")
_ = os.system(f"mkdir -p {templates_dir}")

Load samples

In [57]:
# Both Jet's Regressed Mass above 50, electron veto
filters = [
    [
        ("('ak8FatJetParticleNetMass', '0')", ">=", 50),
        ("('ak8FatJetParticleNetMass', '1')", ">=", 50),
        # ("('nGoodElectrons', '0')", "==", 0),
    ],
]
systematics = {}

# save cutflow as pandas table
cutflow = pd.DataFrame(index=list(samples.keys()) + list(res_samples.keys()))

# utils.remove_empty_parquets(samples_dir, year)
events_dict = utils.load_samples(signal_samples_dir, res_samples, year, filters)
events_dict |= utils.load_samples(samples_dir, samples, year, filters)

utils.add_to_cutflow(events_dict, "BDTPreselection", "weight", cutflow)

print("")
# print weighted sample yields
for sample in events_dict:
    tot_weight = np.sum(events_dict[sample]["weight"].values)
    print(f"Pre-selection {sample} yield: {tot_weight:.2f}")

Loaded NMSSM_XToYH_MX1000_MY125_HTo2bYTo2W_hadronicDecay : 39510 entries
Loaded NMSSM_XToYH_MX1400_MY125_HTo2bYTo2W_hadronicDecay : 55362 entries
Loaded NMSSM_XToYH_MX1400_MY150_HTo2bYTo2W_hadronicDecay : 55866 entries
Loaded NMSSM_XToYH_MX1800_MY125_HTo2bYTo2W_hadronicDecay : 61954 entries
Loaded NMSSM_XToYH_MX1800_MY150_HTo2bYTo2W_hadronicDecay : 63425 entries
Loaded NMSSM_XToYH_MX1800_MY190_HTo2bYTo2W_hadronicDecay : 61065 entries
Loaded NMSSM_XToYH_MX2200_MY125_HTo2bYTo2W_hadronicDecay : 64229 entries
Loaded NMSSM_XToYH_MX2200_MY150_HTo2bYTo2W_hadronicDecay : 63686 entries
Loaded NMSSM_XToYH_MX2200_MY190_HTo2bYTo2W_hadronicDecay : 62670 entries
Loaded NMSSM_XToYH_MX2200_MY250_HTo2bYTo2W_hadronicDecay : 58108 entries
Loaded NMSSM_XToYH_MX3000_MY125_HTo2bYTo2W_hadronicDecay : 66929 entries
Loaded NMSSM_XToYH_MX3000_MY150_HTo2bYTo2W_hadronicDecay : 71414 entries
Loaded NMSSM_XToYH_MX3000_MY190_HTo2bYTo2W_hadronicDecay : 69635 entries
Loaded NMSSM_XToYH_MX3000_MY250_HTo2bYTo2W_hadronic

Scale factors and bb VV assignment

In [58]:
postprocessing.apply_weights(events_dict, year, cutflow)
bb_masks = postprocessing.bb_VV_assignment(events_dict)
cutflow


QCD_SCALE_FACTOR = 0.8144326761300358


Unnamed: 0,BDTPreselection,TriggerEffs,QCD SF
HHbbVV,3.087472,1.781177,1.781177
QCD,2300678.0,1196074.0,974121.7
TT,156583.3,101282.2,101282.2
ST,10944.84,7371.344,7371.344
V+Jets,63569.34,38913.28,38913.28
Diboson,987.6074,625.4659,625.4659
Data,1122314.0,1122314.0,1122314.0
X[1000]->H(bb)Y[125](VV),19.53725,16.50731,16.50731
X[1400]->H(bb)Y[125](VV),27.4038,26.48171,26.48171
X[1400]->H(bb)Y[150](VV),26.45352,25.61982,25.61982


In [59]:
for sample, events in events_dict.items():
    h4qvst = (events["ak8FatJetParTMD_probHWW3q"] + events["ak8FatJetParTMD_probHWW4q"]) / (
        events["ak8FatJetParTMD_probHWW3q"]
        + events["ak8FatJetParTMD_probHWW4q"]
        + events["ak8FatJetParTMD_probQCD"]
        + events["ak8FatJetParTMD_probT"]
    )

    events_dict[sample] = pd.concat(
        [events, pd.concat([h4qvst], axis=1, keys=["ak8FatJetParTMD_THWWvsT"])], axis=1
    )

Control Plots

In [None]:
# {var: (bins, label)}
control_plot_vars = {
    # "MET_pt": ([50, 0, 300], r"$p^{miss}_T$ (GeV)"),
    # "DijetEta": ([50, -8, 8], r"$\eta^{jj}$"),
    # "DijetPt": ([50, 0, 750], r"$p_T^{jj}$ (GeV)"),
    # "DijetMass": (
    #     list(range(800, 1000, 50)) + list(range(1000, 2000, 100)) + list(range(2000, 4001, 200)),
    #     r"$m^{jj}$ (GeV)",
    # ),
    # "bbFatJetEta": ([50, -2.4, 2.4], r"$\eta^{bb}$"),
    # "bbFatJetPt": ([50, 300, 1500], r"$p^{bb}_T$ (GeV)"),
    # "bbFatJetParticleNetMass": ([40, 52.5, 252.5], r"$m^{bb}_{reg}$ (GeV)"),
    # "bbFatJetMsd": ([50, 0, 300], r"$m^{bb}_{msd}$ (GeV)"),
    # "bbFatJetParticleNetMD_Txbb": ([50, 0.8, 1], r"$p^{bb}_{Txbb}$"),
    # "VVFatJetEta": ([50, -2.4, 2.4], r"$\eta^{VV}$"),
    # "VVFatJetPt": ([50, 300, 1500], r"$p^{VV}_T$ (GeV)"),
    # "VVFatJetParticleNetMass": (
    #     list(range(50, 180, 10)) + list(range(180, 301, 15)),
    #     r"$m^{VV}_{reg}$ (GeV)",
    # ),
    # "VVFatJetMsd": ([50, 0, 300], r"$m^{VV}_{msd}$ (GeV)"),
    # "VVFatJetParticleNet_Th4q": ([50, 0, 1], r"Prob($H \to 4q$) vs Prob(QCD) (Non-MD)"),
    # "VVFatJetParTMD_THWW4q": (
    #     [50, 0, 1],
    #     r"Prob($H \to VV \to 4q$) vs Prob(QCD) (Mass-Decorrelated)",
    # ),
    # "VVFatJetParTMD_probT": ([50, 0, 1], r"Prob(Top) (Mass-Decorrelated)"),
    "VVFatJetParTMD_THWWvsT": (
        [50, 0, 1],
        r"Prob($H \to VV \to 4q$) vs Prob(QCD + Top) (Mass-Decorrelated)",
    ),
    # "bbFatJetPtOverDijetPt": ([50, 0, 40], r"$p^{bb}_T / p_T^{jj}$"),
    # "VVFatJetPtOverDijetPt": ([50, 0, 40], r"$p^{VV}_T / p_T^{jj}$"),
    # "VVFatJetPtOverbbFatJetPt": ([50, 0.4, 2.0], r"$p^{VV}_T / p^{bb}_T$"),
    # "nGoodMuons": ([3, 0, 3], r"# of Muons"),
    # "nGoodElectrons": ([3, 0, 3], r"# of Electrons"),
    # "nGoodJets": ([5, 0, 5], r"# of AK4 B-Jets"),
}

hists = postprocessing.control_plots(
    events_dict,
    bb_masks,
    nonres_sig_keys + res_sig_keys,
    control_plot_vars,
    f"{plot_dir}/ControlPlots/{year}/",
    year,
    sig_splits=sig_splits,
    show=True,
)

Overall LP SF

In [61]:
from collections import OrderedDict
from tqdm import tqdm

sel, cf = utils.make_selection(
    res_selection_regions[year]["lpsf"], events_dict, bb_masks, prev_cutflow=cutflow
)

sf_table = OrderedDict()

for sig_key in tqdm(nonres_sig_keys + res_sig_keys):
    systematics[sig_key] = {}
    # calculate only for current year
    events_dict[sig_key] = postprocessing.postprocess_lpsfs(events_dict[sig_key])
    lp_sf, unc, uncs = postprocessing.get_lpsf(events_dict[sig_key], sel[sig_key])
    # print(f"BDT LP Scale Factor for {sig_key}: {lp_sf:.2f} ± {unc:.2f}")
    # print(uncs)

    systematics[sig_key]["lp_sf"] = lp_sf
    systematics[sig_key]["lp_sf_unc"] = unc / lp_sf

    sf_table[sig_key] = {"SF": f"{lp_sf:.2f} ± {unc:.2f}", **uncs}

100%|██████████| 16/16 [00:21<00:00,  1.32s/it]


In [62]:
sf_df = pd.DataFrame(index=nonres_sig_keys + res_sig_keys)

for key in sf_table[sig_key]:
    sf_df[key] = [sf_table[skey][key] for skey in nonres_sig_keys + res_sig_keys]

sf_df.to_clipboard()
sf_df

Unnamed: 0,SF,syst_unc,stat_unc,sj_pt_unc,sj_matching_unc
HHbbVV,0.74 ± 0.12,0.12394,0.036727,0.005133,0.098894
X[1000]->H(bb)Y[125](VV),0.74 ± 0.12,0.113228,0.071632,0.002569,0.093157
X[1400]->H(bb)Y[125](VV),0.74 ± 0.08,7e-05,0.034107,0.028624,0.096366
X[1400]->H(bb)Y[150](VV),0.76 ± 0.08,0.036601,0.03678,0.021558,0.083515
X[1800]->H(bb)Y[125](VV),0.73 ± 0.11,0.044069,0.031941,0.089167,0.101448
X[1800]->H(bb)Y[150](VV),0.74 ± 0.11,0.093565,0.040264,0.079712,0.075407
X[1800]->H(bb)Y[190](VV),0.73 ± 0.12,0.116551,0.030642,0.085897,0.081676
X[2200]->H(bb)Y[125](VV),0.80 ± 0.17,0.071528,0.028014,0.164965,0.109556
X[2200]->H(bb)Y[150](VV),0.73 ± 0.18,0.161876,0.029707,0.162967,0.085026
X[2200]->H(bb)Y[190](VV),0.71 ± 0.21,0.232348,0.054108,0.165472,0.070042


In [63]:
# scale signal by LP SF
for sig_key in nonres_sig_keys + res_sig_keys:
    for wkey in ["finalWeight", "finalWeight_noTrigEffs"]:
        events_dict[sig_key][wkey] *= systematics[sig_key]["lp_sf"]

Templates

In [64]:
# h = postprocessing.get_templates(
#     events_dict,
#     bb_masks,
#     year,
#     nonres_sig_keys + res_sig_keys,
#     res_selection_regions[year],
#     res_shape_vars,
#     plot_dir=plot_dir,
#     prev_cutflow=cutflow,
#     sig_splits=sig_splits,
#     weight_shifts={},
#     jshift="",
#     # plot_shifts=False,
#     pass_ylim=5,
#     fail_ylim=460,
#     blind_pass=True,
#     show=False,
# )

In [72]:
templates = {}

for jshift in [""] + jec_shifts + jmsr_shifts:
    print(jshift)
    ttemps, tsyst = postprocessing.get_templates(
        events_dict,
        bb_masks,
        year,
        nonres_sig_keys + res_sig_keys,
        res_selection_regions[year],
        res_shape_vars,
        plot_dir=plot_dir if jshift == "" else "",
        prev_cutflow=cutflow,
        sig_splits=sig_splits,
        weight_shifts=postprocessing.weight_shifts,
        jshift=jshift,
        pass_ylim=5,
        fail_ylim=460,
        blind_pass=True,
        show=False,
        plot_shifts=False,
    )

    templates = {**templates, **ttemps}
    if jshift == "":
        systematics[year] = tsyst


pass
fail


KeyboardInterrupt: 

In [73]:
systematics

{'HHbbVV': {'lp_sf': 0.7398819221312001, 'lp_sf_unc': 0.16283816599413997},
 'X[1000]->H(bb)Y[125](VV)': {'lp_sf': 0.7381843897909458,
  'lp_sf_unc': 0.16320690238042973},
 'X[1400]->H(bb)Y[125](VV)': {'lp_sf': 0.7360006527756711,
  'lp_sf_unc': 0.10615540339225933},
 'X[1400]->H(bb)Y[150](VV)': {'lp_sf': 0.7580473506110272,
  'lp_sf_unc': 0.10065754090912249},
 'X[1800]->H(bb)Y[125](VV)': {'lp_sf': 0.7327312649242111,
  'lp_sf_unc': 0.14561852219659716},
 'X[1800]->H(bb)Y[150](VV)': {'lp_sf': 0.7378678860628718,
  'lp_sf_unc': 0.14971905152700934},
 'X[1800]->H(bb)Y[190](VV)': {'lp_sf': 0.7329480898931854,
  'lp_sf_unc': 0.16903369661848308},
 'X[2200]->H(bb)Y[125](VV)': {'lp_sf': 0.7955666102753871,
  'lp_sf_unc': 0.2124073730033089},
 'X[2200]->H(bb)Y[150](VV)': {'lp_sf': 0.728456956395807,
  'lp_sf_unc': 0.24672602176163788},
 'X[2200]->H(bb)Y[190](VV)': {'lp_sf': 0.7074723256762374,
  'lp_sf_unc': 0.2986638289700003},
 'X[2200]->H(bb)Y[250](VV)': {'lp_sf': 0.7275312381871016,
  'l

In [77]:
# with open(f"{templates_dir}/{year}_templates.pkl", "wb") as f:
#     pickle.dump(templates, f)

with open(f"{templates_dir}/systematics.json", "w") as f:
    json.dump(systematics, f)

In [78]:
systematics

{'HHbbVV': {'lp_sf': 0.7398819221312001, 'lp_sf_unc': 0.16283816599413997},
 'X[1000]->H(bb)Y[125](VV)': {'lp_sf': 0.7381843897909458,
  'lp_sf_unc': 0.16320690238042973},
 'X[1400]->H(bb)Y[125](VV)': {'lp_sf': 0.7360006527756711,
  'lp_sf_unc': 0.10615540339225933},
 'X[1400]->H(bb)Y[150](VV)': {'lp_sf': 0.7580473506110272,
  'lp_sf_unc': 0.10065754090912249},
 'X[1800]->H(bb)Y[125](VV)': {'lp_sf': 0.7327312649242111,
  'lp_sf_unc': 0.14561852219659716},
 'X[1800]->H(bb)Y[150](VV)': {'lp_sf': 0.7378678860628718,
  'lp_sf_unc': 0.14971905152700934},
 'X[1800]->H(bb)Y[190](VV)': {'lp_sf': 0.7329480898931854,
  'lp_sf_unc': 0.16903369661848308},
 'X[2200]->H(bb)Y[125](VV)': {'lp_sf': 0.7955666102753871,
  'lp_sf_unc': 0.2124073730033089},
 'X[2200]->H(bb)Y[150](VV)': {'lp_sf': 0.728456956395807,
  'lp_sf_unc': 0.24672602176163788},
 'X[2200]->H(bb)Y[190](VV)': {'lp_sf': 0.7074723256762374,
  'lp_sf_unc': 0.2986638289700003},
 'X[2200]->H(bb)Y[250](VV)': {'lp_sf': 0.7275312381871016,
  'l

In [None]:
templates_dict = {}

for year in years:
    with open(f"templates/{date}/{year}_templates.pkl", "rb") as f:
        templates_dict[year] = pickle.load(f)