In [2]:
import utils
import plotting
import postprocessing
import corrections

from utils import CUT_MAX_VAL
from hh_vars import (
    years,
    data_key,
    qcd_key,
    bg_keys,
    samples,
    nonres_sig_keys,
    # res_samples,
    # res_sig_keys,
    nonres_samples,
    txbb_wps,
    jec_shifts,
    jmsr_shifts,
)
from postprocessing import res_shape_vars

from collections import OrderedDict

import numpy as np
import pandas as pd
import pickle, json
from pandas.errors import SettingWithCopyWarning
from hist import Hist

import os
from copy import deepcopy
from inspect import cleandoc
import warnings

# ignore these because they don't seem to apply
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
res_samples = OrderedDict()

res_mps = [
    (1000, 125),
    (1400, 125),
    (1400, 150),
    (1800, 125),
    (1800, 150),
    (1800, 190),
    (2200, 125),
    (2200, 150),
    (2200, 190),
    (2200, 250),
    (3000, 125),
    (3000, 150),
    (3000, 190),
    (3000, 250),
    (3000, 350),
]

for mX, mY in res_mps:
    res_samples[
        f"X[{mX}]->H(bb)Y[{mY}](VV)"
    ] = f"NMSSM_XToYH_MX{mX}_MY{mY}_HTo2bYTo2W_hadronicDecay"

res_sig_keys = list(res_samples.keys())

In [5]:
samples = nonres_samples | samples

In [6]:
sig_split_points = [
    [
        (1000, 125),
        (1400, 125),
        (1800, 125),
        (2200, 125),
        (3000, 125),
    ],
    [
        (1400, 150),
        (1800, 150),
        (1800, 190),
        (2200, 150),
        (2200, 190),
        (2200, 250),
    ],
    [
        (3000, 125),
        (3000, 150),
        (3000, 190),
        (3000, 250),
        (3000, 350),
    ],
]

sig_splits = [
    ["HHbbVV"] + [f"X[{mX}]->H(bb)Y[{mY}](VV)" for (mX, mY) in mps] for mps in sig_split_points
]

In [10]:
MAIN_DIR = "../../../"
samples_dir = f"{MAIN_DIR}/../data/skimmer/Feb24"
signal_samples_dir = f"{MAIN_DIR}/../data/skimmer/Mar10_2"
year = "2017"

date = "23Apr30"
plot_dir = f"../../../plots/PostProcessing/{date}/"
templates_dir = f"templates/{date}/"
for i in range(len(sig_splits)):
    _ = os.system(f"mkdir -p {plot_dir}/ControlPlots/{year}/sigs{i}/")
_ = os.system(f"mkdir -p {plot_dir}/cutflows/")
_ = os.system(f"mkdir -p {plot_dir}/templates/wshifts")
_ = os.system(f"mkdir -p {plot_dir}/templates/jshifts")
_ = os.system(f"mkdir -p {plot_dir}/templates/hists2d")
_ = os.system(f"mkdir -p {templates_dir}")

selection_regions = postprocessing.get_res_selection_regions(year)

Load samples

In [11]:
# Both Jet's Regressed Mass above 50, electron veto
filters = [
    [
        ("('ak8FatJetParticleNetMass', '0')", ">=", 50),
        ("('ak8FatJetParticleNetMass', '1')", ">=", 50),
        ("('nGoodElectrons', '0')", "==", 0),
    ],
]
systematics = {}

# save cutflow as pandas table
cutflow = pd.DataFrame(index=list(samples.keys()) + list(res_samples.keys()))

# utils.remove_empty_parquets(samples_dir, year)
events_dict = utils.load_samples(signal_samples_dir, res_samples, year, filters)
events_dict |= utils.load_samples(samples_dir, samples, year, filters)

utils.add_to_cutflow(events_dict, "Preselection", "weight", cutflow)

print("")
# print weighted sample yields
for sample in events_dict:
    tot_weight = np.sum(events_dict[sample]["weight"].values)
    print(f"Pre-selection {sample} yield: {tot_weight:.2f}")

Loaded NMSSM_XToYH_MX1000_MY125_HTo2bYTo2W_hadronicDecay : 39428 entries
Loaded NMSSM_XToYH_MX1400_MY125_HTo2bYTo2W_hadronicDecay : 55284 entries
Loaded NMSSM_XToYH_MX1400_MY150_HTo2bYTo2W_hadronicDecay : 55798 entries
Loaded NMSSM_XToYH_MX1800_MY125_HTo2bYTo2W_hadronicDecay : 61891 entries
Loaded NMSSM_XToYH_MX1800_MY150_HTo2bYTo2W_hadronicDecay : 63370 entries
Loaded NMSSM_XToYH_MX1800_MY190_HTo2bYTo2W_hadronicDecay : 61006 entries
Loaded NMSSM_XToYH_MX2200_MY125_HTo2bYTo2W_hadronicDecay : 64158 entries
Loaded NMSSM_XToYH_MX2200_MY150_HTo2bYTo2W_hadronicDecay : 63625 entries
Loaded NMSSM_XToYH_MX2200_MY190_HTo2bYTo2W_hadronicDecay : 62599 entries
Loaded NMSSM_XToYH_MX2200_MY250_HTo2bYTo2W_hadronicDecay : 58026 entries
Loaded NMSSM_XToYH_MX3000_MY125_HTo2bYTo2W_hadronicDecay : 66886 entries
Loaded NMSSM_XToYH_MX3000_MY150_HTo2bYTo2W_hadronicDecay : 71355 entries
Loaded NMSSM_XToYH_MX3000_MY190_HTo2bYTo2W_hadronicDecay : 69583 entries
Loaded NMSSM_XToYH_MX3000_MY250_HTo2bYTo2W_hadronic

In [None]:
higgs_samples = OrderedDict(
    [
        ("Hbb", "*HToBB"),
        ("HWW", ("*HToWW", "*HToNonbb")),
        ("HH", ("VBF_HHTobbVV_CV_1_C2V_1_C3_1", "GluGluToHHTo4B_node_cHHH1_preUL")),
    ]
)

events_dict |= utils.load_samples(samples_dir, higgs_samples, year, filters)

cutflow = pd.DataFrame(
    index=list(samples.keys()) + list(res_samples.keys()) + list(higgs_samples.keys())
)
utils.add_to_cutflow(events_dict, "Preselection", "weight", cutflow)

print("")
# print weighted sample yields
for sample in events_dict:
    tot_weight = np.sum(events_dict[sample]["weight"].values)
    print(f"Pre-selection {sample} yield: {tot_weight:.2f}")

In [None]:
import matplotlib.pyplot as plt

bins = np.arange(-20, 61, 5)
plt.hist(events_dict["ST"]["weight"], bins, histtype="step", label="ST")
plt.hist(events_dict["TT"]["weight"], bins, histtype="step", label="TT")
plt.yscale("log")
plt.ylabel("# Events")
plt.xlabel("Weights")
plt.legend()
plt.savefig(f"{plot_dir}/sttt_weights.pdf")

Scale factors and bb VV assignment

In [12]:
postprocessing.apply_weights(events_dict, year, cutflow)
bb_masks = postprocessing.bb_VV_assignment(events_dict)
cutflow


QCD_SCALE_FACTOR = 0.8133597042925382


Unnamed: 0,Preselection,TriggerEffs,QCD SF
HHbbVV,3.080614,1.777757,1.777757
QCD,2297340.0,1194347.0,971433.4
TT,139361.7,90162.84,90162.84
ST,9979.986,6717.597,6717.597
V+Jets,63475.58,38860.9,38860.9
Diboson,946.7148,595.6462,595.6462
Hbb,1206.498,796.5818,796.5818
HWW,190.6274,134.8242,134.8242
HH,8.178608,4.746358,4.746358
Data,1108730.0,1108730.0,1108730.0


In [13]:
for sample, events in events_dict.items():
    if "ak8FatJetParTMD_THWWvsT" not in events:
        h4qvst = (events["ak8FatJetParTMD_probHWW3q"] + events["ak8FatJetParTMD_probHWW4q"]) / (
            events["ak8FatJetParTMD_probHWW3q"]
            + events["ak8FatJetParTMD_probHWW4q"]
            + events["ak8FatJetParTMD_probQCD"]
            + events["ak8FatJetParTMD_probT"]
        )

        events_dict[sample] = pd.concat(
            [events, pd.concat([h4qvst], axis=1, keys=["ak8FatJetParTMD_THWWvsT"])], axis=1
        )

Control Plots

In [None]:
# {var: (bins, label)}
control_plot_vars = {
    # "MET_pt": ([50, 0, 300], r"$p^{miss}_T$ (GeV)"),
    # "DijetEta": ([50, -8, 8], r"$\eta^{jj}$"),
    # "DijetPt": ([50, 0, 750], r"$p_T^{jj}$ (GeV)"),
    "DijetMass": (
        # list(range(800, 1400, 100)) + [1400, 1600, 2000, 3000, 4400],
        [40, 600, 4500],
        r"$m^{jj}$ (GeV)",
    ),
    # "bbFatJetEta": ([50, -2.4, 2.4], r"$\eta^{bb}$"),
    # "bbFatJetPt": ([50, 300, 1500], r"$p^{bb}_T$ (GeV)"),
    "bbFatJetParticleNetMass": ([40, 52.5, 252.5], r"$m^{bb}_{reg}$ (GeV)"),
    # "bbFatJetMsd": ([50, 0, 300], r"$m^{bb}_{msd}$ (GeV)"),
    # "bbFatJetParticleNetMD_Txbb": ([50, 0.8, 1], r"$T^{bb}_{Xbb}$"),
    # "VVFatJetEta": ([50, -2.4, 2.4], r"$\eta^{VV}$"),
    # "VVFatJetPt": ([50, 300, 1500], r"$p^{VV}_T$ (GeV)"),
    "VVFatJetParticleNetMass": (
        # list(range(50, 110, 10)) + list(range(110, 200, 15)) + [200, 220, 250],
        [40, 52.5, 252.5],
        r"$m^{VV}_{reg}$ (GeV)",
    ),
    # "VVFatJetMsd": ([50, 0, 300], r"$m^{VV}_{msd}$ (GeV)"),
    # "VVFatJetParticleNet_Th4q": ([50, 0, 1], r"Prob($H \to 4q$) vs Prob(QCD) (Non-MD)"),
    # "VVFatJetParTMD_THWW4q": (
    #     [50, 0, 1],
    #     r"Prob($H \to VV \to 4q$) vs Prob(QCD) (Mass-Decorrelated)",
    # ),
    # "VVFatJetParTMD_probT": ([50, 0, 1], r"Prob(Top) (Mass-Decorrelated)"),
    # "VVFatJetParTMD_THWWvsT": (
    #     [50, 0, 1],
    #     r"$T^{VV}_{HWW}$",
    # ),
    # "bbFatJetPtOverDijetPt": ([50, 0, 40], r"$p^{bb}_T / p_T^{jj}$"),
    # "VVFatJetPtOverDijetPt": ([50, 0, 40], r"$p^{VV}_T / p_T^{jj}$"),
    # "VVFatJetPtOverbbFatJetPt": ([50, 0.4, 2.0], r"$p^{VV}_T / p^{bb}_T$"),
    # "nGoodMuons": ([3, 0, 3], r"# of Muons"),
    # "nGoodElectrons": ([3, 0, 3], r"# of Electrons"),
    # "nGoodJets": ([5, 0, 5], r"# of AK4 B-Jets"),
}

hists = postprocessing.control_plots(
    events_dict,
    bb_masks,
    nonres_sig_keys + res_sig_keys,
    control_plot_vars,
    f"{plot_dir}/ControlPlots/{year}/",
    year,
    sig_splits=sig_splits,
    # bg_keys=bg_keys + list(higgs_samples.keys()),
    bg_keys=["QCD", "TT", "ST", "V+Jets"],
    show=True,
)

Overall LP SF

In [19]:
from collections import OrderedDict
from tqdm import tqdm

sel, cf = utils.make_selection(
    selection_regions["lpsf"].cuts, events_dict, bb_masks, prev_cutflow=cutflow
)

sf_table = OrderedDict()

for sig_key in tqdm(nonres_sig_keys + res_sig_keys):
    systematics[sig_key] = {}
    # calculate only for current year
    events_dict[sig_key] = postprocessing.postprocess_lpsfs(events_dict[sig_key])
    lp_sf, unc, uncs = postprocessing.get_lpsf(events_dict[sig_key], sel[sig_key])
    # print(f"BDT LP Scale Factor for {sig_key}: {lp_sf:.2f} ± {unc:.2f}")
    # print(uncs)

    systematics[sig_key]["lp_sf"] = lp_sf
    systematics[sig_key]["lp_sf_unc"] = unc / lp_sf

    sf_table[sig_key] = {"SF": f"{lp_sf:.2f} ± {unc:.2f}", **uncs}

100%|██████████| 16/16 [00:09<00:00,  1.65it/s]


In [20]:
sf_df = pd.DataFrame(index=nonres_sig_keys + res_sig_keys)

for key in sf_table[sig_key]:
    sf_df[key] = [sf_table[skey][key] for skey in nonres_sig_keys + res_sig_keys]

sf_df.to_clipboard()
sf_df

Unnamed: 0,SF,syst_unc,stat_unc,sj_pt_unc,sj_matching_unc
HHbbVV,0.75 ± 0.12,0.120119,0.036521,0.005156,0.099767
X[1000]->H(bb)Y[125](VV),0.74 ± 0.12,0.111157,0.07079,0.002577,0.094437
X[1400]->H(bb)Y[125](VV),0.74 ± 0.08,0.003219,0.033844,0.028656,0.096444
X[1400]->H(bb)Y[150](VV),0.76 ± 0.08,0.03875,0.03645,0.021599,0.084621
X[1800]->H(bb)Y[125](VV),0.73 ± 0.11,0.04666,0.031887,0.089474,0.10214
X[1800]->H(bb)Y[150](VV),0.74 ± 0.11,0.091469,0.039353,0.080078,0.075578
X[1800]->H(bb)Y[190](VV),0.73 ± 0.12,0.117856,0.030077,0.086102,0.082002
X[2200]->H(bb)Y[125](VV),0.79 ± 0.17,0.074176,0.027825,0.165057,0.11016
X[2200]->H(bb)Y[150](VV),0.73 ± 0.18,0.158499,0.02903,0.162866,0.085763
X[2200]->H(bb)Y[190](VV),0.71 ± 0.21,0.230285,0.052574,0.165579,0.070822


In [21]:
# scale signal by LP SF
for sig_key in nonres_sig_keys + res_sig_keys:
    for wkey in ["finalWeight", "finalWeight_noTrigEffs"]:
        events_dict[sig_key][wkey] *= systematics[sig_key]["lp_sf"]

Templates

In [23]:
h, tsysts = postprocessing.get_templates(
    events_dict,
    bb_masks,
    year,
    nonres_sig_keys + res_sig_keys[:10],
    selection_regions,
    res_shape_vars,
    bg_keys=["QCD", "TT", "V+Jets", "Diboson", "HWW", "VH(bb)", "Hbb", "HH"],
    plot_dir=plot_dir,
    prev_cutflow=cutflow,
    sig_splits=sig_splits[:2],
    weight_shifts={},
    jshift="",
    plot_shifts=False,
    pass_ylim=7,
    fail_ylim=40000,
    blind_pass=True,
    show=False,
)

pass

Cutflow:
                           Preselection   TriggerEffs        QCD SF  \
HHbbVV                    3.080614e+00  1.777757e+00  1.777757e+00   
QCD                       2.297340e+06  1.194347e+06  9.714334e+05   
TT                        1.393617e+05  9.016284e+04  9.016284e+04   
ST                        9.979986e+03  6.717597e+03  6.717597e+03   
V+Jets                    6.347558e+04  3.886090e+04  3.886090e+04   
Diboson                   9.467148e+02  5.956462e+02  5.956462e+02   
Hbb                       1.206498e+03  7.965818e+02  7.965818e+02   
HWW                       1.906274e+02  1.348242e+02  1.348242e+02   
HH                        8.178608e+00  4.746358e+00  4.746358e+00   
Data                      1.108730e+06  1.108730e+06  1.108730e+06   
X[1000]->H(bb)Y[125](VV)  1.949911e+01  1.647412e+01  1.647412e+01   
X[1400]->H(bb)Y[125](VV)  2.736733e+01  2.644665e+01  2.644665e+01   
X[1400]->H(bb)Y[150](VV)  2.642236e+01  2.559143e+01  2.559143e+01   
X[18

In [None]:
templates = {}

for jshift in [""] + jec_shifts + jmsr_shifts:
    print(jshift)
    ttemps, tsyst = postprocessing.get_templates(
        events_dict,
        bb_masks,
        year,
        nonres_sig_keys + res_sig_keys,
        res_selection_regions[year],
        res_shape_vars,
        bg_keys=["QCD", "TT", "V+Jets"],
        plot_dir=plot_dir if jshift == "" else "",
        prev_cutflow=cutflow,
        sig_splits=sig_splits,
        weight_shifts=postprocessing.weight_shifts,
        jshift=jshift,
        pass_ylim=7,
        fail_ylim=40000,
        blind_pass=True,
        show=False,
        plot_shifts=False,
    )

    templates = {**templates, **ttemps}
    if jshift == "":
        systematics[year] = tsyst

In [None]:
with open(f"{templates_dir}/{year}_templates.pkl", "wb") as f:
    pickle.dump(templates, f)

with open(f"{templates_dir}/systematics.json", "w") as f:
    json.dump(systematics, f)

In [4]:
with open(f"templates/Apr10//2017_templates.pkl", "rb") as f:
    templates = pickle.load(f)

In [8]:
len(templates["pass"].axes[2])

10

In [None]:
plotting.hist2ds(
    templates,
    f"{plot_dir}/templates/hists2d/",
    regions=["pass", "fail", "passBlinded", "failBlinded"],
    region_labels=selection_regions_label,
    samples=["Data", "TT", "V+Jets", "X[3000]->H(bb)Y[190](VV)"],
    # fail_zlim=5e3,
    # pass_zlim=1.0,
)

In [None]:
systematics

In [None]:
templates_dict = {}

for year in years:
    with open(f"templates/{date}/{year}_templates.pkl", "rb") as f:
        templates_dict[year] = pickle.load(f)

In [None]:
templates = []
for year in years:
    with open(f"templates/Apr7//{year}_templates.pkl", "rb") as f:
        templates.append(pickle.load(f))