# Combining VBF and ggF templates

Need to update this since pass regions' names have changed to pass_ggf and pass_vbf even for separate templates

Author(s): Raghav Kansal

In [None]:
from __future__ import annotations

import json
import pickle
from pathlib import Path

from HHbbVV.hh_vars import years

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
ggf_templates_path = Path("templates/24Apr12ggFOR")
vbf_templates_path = Path("templates/24Apr8VBFHP999")
templates_path = Path("templates/24Apr12NonresCombinedggFOR")
templates_path.mkdir(exist_ok=True)

## Combining templates

In [None]:
templates = {}

for year in years:
    combined_templates = {}

    with (ggf_templates_path / f"{year}_templates.pkl").open("rb") as f:
        ggf_templates = pickle.load(f)

    with (vbf_templates_path / f"{year}_templates.pkl").open("rb") as f:
        vbf_templates = pickle.load(f)

    for region, h in ggf_templates.items():
        combined_templates[region] = h  # don't need to rename regions anymore
        # if region.startswith("pass"):
        #     # rename pass regions
        #     combined_templates[region.replace("pass", "passggf")] = h
        # else:
        #     combined_templates[region] = h

    for region, h in vbf_templates.items():
        if region.startswith("pass"):
            # rename pass regions
            combined_templates[region.replace("pass", "passvbf")] = h
        else:
            # checking that fail regions are identical
            assert combined_templates[region] == h

    # save combined templates
    with (templates_path / f"{year}_templates.pkl").open("wb") as f:
        pickle.dump(combined_templates, f)

    templates[year] = combined_templates

## Combining systematics

In [None]:
# combined_systematics = {"passggf": {}, "passvbf": {}}
combined_systematics = {"passvbf": {}}

with (ggf_templates_path / "systematics.json").open("r") as f:
    ggf_systematics = json.load(f)

with (vbf_templates_path / "systematics.json").open("r") as f:
    vbf_systematics = json.load(f)

combined_systematics["passggf"] = ggf_systematics["passggf"]

for skey, sval in ggf_systematics.items():
    if skey in years:
        combined_systematics[skey] = {}
        for region, val in sval.items():
            combined_systematics[skey][region] = val
            # if region.startswith("pass"):
            #     # rename pass regions
            #     combined_systematics[skey][region.replace("pass", "passggf")] = val
            # else:
            #     combined_systematics[skey][region] = val
    # else:
    #     # LP SFs
    #     combined_systematics["passggf"][skey] = sval

for skey, sval in vbf_systematics.items():
    if skey in years:
        for region, val in sval.items():
            if region.startswith("pass"):
                # rename pass regions
                combined_systematics[skey][region.replace("pass", "passvbf")] = val
            else:
                # checking that fail regions are identical
                assert combined_systematics[skey][region] == val
    else:
        # LP SFs
        combined_systematics["passvbf"][skey] = sval

with open(templates_path / "systematics.json", "w") as f:
    json.dump(combined_systematics, f, indent=4)

TODO: Combining cutflows

## Testing JEC JMC plots

In [None]:
import warnings
from pathlib import Path

import datacardHelpers
import hist
import plotting
import postprocessing
import regions
import utils
from hist import Hist

from HHbbVV.hh_vars import bg_keys, jecs, jmsr, qcd_key

In [None]:
MAIN_DIR = Path("../../../")

main_plot_dir = MAIN_DIR / "plots/PostProcessing/24Apr12NonresCombinedggFOR/Templates"

In [None]:
def combine_hists(*hists):
    csamples = []
    for h in hists:
        csamples += list(h.axes[0])

    reth = Hist(
        hist.axis.StrCategory(csamples, name="Sample"),
        *hists[0].axes[1:],
        storage="weight",
    )

    for h in hists:
        for sample in h.axes[0]:
            reth.view(flow=True)[utils.get_key_index(reth, sample), ...] = h[sample, ...].view(
                flow=True
            )

    return reth

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import mplhep as hep

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))

# this is needed for some reason to update the font size for the first plot
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
plt.rcParams.update({"font.size": 24})
plt.close()

In [None]:
"HHbbVV" in cjt.axes[0]

In [None]:
warnings.filterwarnings("ignore")

p_sig_keys = ["HHbbVV", "VBFHHbbVV"]
p_bg_keys = [key for key in bg_keys if key != qcd_key]
p_all_keys = p_sig_keys + p_bg_keys

sig_scale_dict = {
    "HHbbVV": 50,
    "VBFHHbbVV": 100,
    # "qqHH_CV_1_C2V_0_kl_1_HHbbVV": 10,
    # "qqHH_CV_1_C2V_2_kl_1_HHbbVV": 10,
}

sig_key = "HHbbVV"

for year, temps in templates.items():
    # if year != "2017":
    #     continue

    plot_dir = main_plot_dir / year / "wshifts"
    plot_dir.mkdir(exist_ok=True, parents=True)
    for rname, region in regions.get_nonres_selection_regions(year).items():
        if region.lpsf:
            continue

        # if rname != "passggf":
        #     continue

        for wshift in list((jecs | jmsr).keys()) + ["pileup"]:
            if wshift in jecs | jmsr:
                continue
                # adding jshift-ed histograms into the same histogram
                cjshift_templates = [temps[rname]]
                for shift in ["up", "down"]:
                    # new histogram with sample names renamed to "{sample}_{jsf}_{shift}"
                    jt = temps[f"{rname}_{wshift}_{shift}"]
                    slabels = [f"{s}_{wshift}_{shift}" for s in jt.axes[0]]
                    rjt = Hist(
                        hist.axis.StrCategory(slabels, name="Sample"),
                        *jt.axes[1:],
                        storage="weight",
                    )
                    rjt.view()[...] = jt.view()
                    cjshift_templates.append(rjt)

                cjt = combine_hists(*cjshift_templates)
                shift_label = wshift
            else:
                shift_label = postprocessing.weight_shifts[wshift].label
                cjt = temps[rname]

            plotting.sigErrRatioPlot(
                cjt,
                year,
                sig_key,
                wshift,
                f"{region.label} Region {shift_label} Variations",
                plot_dir,
                f"{rname}_sig_{wshift}",
                show=False,
            )

            plotting.ratioHistPlot(
                cjt,
                year,
                [],
                p_bg_keys,
                sig_scale_dict=sig_scale_dict,
                syst=(wshift, p_all_keys),
                title=f"{region.label} Region {wshift} Variations",
                plot_ratio=False,
                plot_data=True,
                bg_err_type="line",
                # plot_signal=False,
                name=f"{plot_dir}/{rname}_bg_{wshift}.pdf",
                show=False,
            )

    #         break
    #     break
    # break

In [None]:
plot_dir = main_plot_dir / "wshifts"
plot_dir.mkdir(exist_ok=True, parents=True)

temps = datacardHelpers.sum_templates(templates, years)

for rname, region in regions.get_nonres_selection_regions(year).items():
    if region.lpsf:
        continue

    if rname != "passggf":
        continue

    for wshift in ["FSRPartonShower", "ISRPartonShower"]:
        shift_label = postprocessing.weight_shifts[wshift].label
        cjt = temps[rname]

        plotting.sigErrRatioPlot(
            cjt,
            "all",
            sig_key,
            wshift,
            f"{region.label} Region {shift_label} Variations",
            plot_dir,
            f"{rname}_sig_{wshift}",
            show=False,
        )

        plotting.ratioHistPlot(
            cjt,
            "all",
            [],
            p_bg_keys,
            sig_scale_dict=sig_scale_dict,
            syst=(wshift, p_all_keys),
            title=f"{region.label} Region {shift_label} Variations",
            plot_ratio=False,
            plot_data=True,
            bg_err_type="line",
            # plot_signal=False,
            name=f"{plot_dir}/{rname}_bg_{wshift}.pdf",
            show=False,
        )

    #     break
    # break