In [1]:
from __future__ import annotations

import pickle
from collections import OrderedDict
from pathlib import Path

import hist
import matplotlib.pyplot as plt
import numpy as np
import plotting
import uproot
from datacardHelpers import sum_templates
from hist import Hist
from postprocessing import nonres_shape_vars

from HHbbVV.hh_vars import bg_keys, data_key, years
from HHbbVV.postprocessing import utils

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
MAIN_DIR = Path("../../../")
nTF = 0

vbf_only = False
ggf_only = False
k2v0sig = False
unblinded = True

plot_dir = MAIN_DIR / "plots/PostFit/25Apr11Combination"
# plot_dir = (
#     MAIN_DIR
#     / "plots/PostFit/24Apr9ggFScan/nTF1/ggf_txbb_MP_ggf_bdt_0.9965_vbf_txbb_HP_vbf_bdt_0.999_lepton_veto_Hbb"
# )
plot_dir.mkdir(exist_ok=True, parents=True)

In [4]:
cards_dir = "25Mar15NonresUpdateLPSFs/ggf-sig-only"
# cards_dir = f"f_tests/24Apr26NonresBDT995/ggf/nTF_{nTF}"
asimov = False

asimov_label = "Asimov" if asimov else ""
file = uproot.open(
    # f"/uscms/home/rkansal/hhcombine/cards/{cards_dir}/FitShapesB{asimov_label}.root"
    f"/home/users/rkansal/HHbbVV/cards//{cards_dir}/FitShapesB{asimov_label}.root"
    # f"/uscms/home/rkansal/eos/bbVV/cards/{cards_dir}/FitShapes.root"
)

In [5]:
# templates_dir = Path(f"templates/{cards_dir}")
sig_templates_dir = Path("templates/25Feb6NonresMatchingFix")
bg_templates_dir = Path("templates/24Aug26BDT995AllSigs")
# templates_dir = Path(
#     f"templates/24Apr9ggFScan/ggf_txbb_HP_ggf_bdt_0.996_vbf_txbb_HP_vbf_bdt_0.999_lepton_veto_Hbb"
# )
templates_dict = {}
for year in years:
    with (sig_templates_dir / f"{year}_templates.pkl").open("rb") as f:
        templates_dict[year] = pickle.load(f)

sig_pre_templates = sum_templates(templates_dict, years)

templates_dict = {}
for year in years:
    with (bg_templates_dir / f"{year}_templates.pkl").open("rb") as f:
        templates_dict[year] = pickle.load(f)

bg_pre_templates = sum_templates(templates_dict, years)

In [6]:
workspace_data_key = "data_obs"  # if not asimov else "toy_asimov"

# (name in templates, name in cards)
hist_label_map_inverse = OrderedDict(
    [
        ("QCD", "CMS_bbWW_hadronic_qcd_datadriven"),
        # ("Diboson", "diboson"),
        ("TT", "ttbar"),
        ("ST", "singletop"),
        ("Z+Jets", "zjets"),
        ("W+Jets", "wjets"),
        # ("HHbbVV", "ggHH_kl_1_kt_1_hbbhww4q"),
        # ("VBFHHbbVV", "qqHH_CV_1_C2V_1_kl_1_HHbbww"),  # change to hbbhww future
        (data_key, workspace_data_key),
    ]
)

hist_label_map = {val: key for key, val in hist_label_map_inverse.items()}

sig_keys = ["HHbbVV", "VBFHHbbVV", "qqHH_CV_1_C2V_0_kl_1_HHbbVV", "qqHH_CV_1_C2V_2_kl_1_HHbbVV"]
# sig_keys = ["qqHH_CV_1_C2V_0_kl_1_HHbbVV"] if k2v0sig else ["HHbbVV"]

pbg_keys = [bk for bk in bg_keys if bk not in ["Diboson", "Hbb", "HWW"]]
samples = pbg_keys + sig_keys + [data_key]

In [7]:
shapes = {
    "prefit": "Pre-Fit",
    # "shapes_fit_s": "S+B Post-Fit",
    "postfit": "B-only Post-Fit",
}

# if unblinded:
#     shapes["postfit"] = "Post-Fit"

shape_vars = nonres_shape_vars

selection_regions = {
    "passvbf": "VBF",
    "passggf": "ggF",
    "fail": "Fail",
}

if vbf_only:
    selection_regions.pop("passggf")

if ggf_only:
    selection_regions.pop("passvbf")

In [8]:
hists = {}
bgerrs = {}

for shape in shapes:
    print(shape)
    hists[shape] = {
        region: Hist(
            hist.axis.StrCategory(samples, name="Sample"),
            *[shape_var.axis for shape_var in shape_vars],
            storage="double",
        )
        for region in selection_regions
    }
    bgerrs[shape] = {}

    for region in selection_regions:
        h = hists[shape][region]
        templates = file[f"{region}_{shape}"]
        for key, file_key in hist_label_map_inverse.items():
            if key != data_key:
                if file_key not in templates:
                    print(f"No {key} in {region}")
                    continue

                data_key_index = np.where(np.array(list(h.axes[0])) == key)[0][0]
                h.view(flow=False)[data_key_index, :] = templates[file_key].values()

        # if key not in fit output, take from templates
        for key in pbg_keys:
            if key not in hist_label_map_inverse:
                data_key_index = np.where(np.array(list(h.axes[0])) == key)[0][0]
                h.view(flow=False)[data_key_index, :] = bg_pre_templates[region][key, ...].values()

        # if key not in fit output, take from templates
        for key in sig_keys:
            if key not in hist_label_map_inverse:
                data_key_index = np.where(np.array(list(h.axes[0])) == key)[0][0]
                h.view(flow=False)[data_key_index, :] = sig_pre_templates[region][key, ...].values()

        data_key_index = np.where(np.array(list(h.axes[0])) == data_key)[0][0]
        h.view(flow=False)[data_key_index, :] = np.nan_to_num(
            templates[hist_label_map_inverse[data_key]].values()
        )

        bgerrs[shape][region] = templates["TotalBkg"].errors()

prefit
No ST in passvbf
No ST in passggf
postfit
No ST in passvbf
No ST in passggf


In [9]:
if not unblinded:
    for shapeh in hists.values():
        for region, h in shapeh.items():
            if region != "fail":
                utils.blindBins(h, [100, 150], data_key, axis=0)

In [19]:
ylims = {"passggf": 60, "passvbf": 11, "fail": 7e5}
title_label = " Asimov Dataset" if asimov else ""
sig_scale_dict = {
    "HHbbVV": 100,
    "VBFHHbbVV": 2000,
    "qqHH_CV_1_C2V_0_kl_1_HHbbVV": 1,
    "qqHH_CV_1_C2V_2_kl_1_HHbbVV": 1,
}
sig_scale_dict = {key: val for key, val in sig_scale_dict.items() if key in sig_keys}

(plot_dir / "preliminary").mkdir(exist_ok=True, parents=True)
(plot_dir / "final").mkdir(exist_ok=True, parents=True)

for prelim, plabel, pplotdir in zip([True, False], ["Preliminary", ""], ["preliminary", "final"]):
    for shape, shape_label in shapes.items():
        # if shape != "postfit":
        #     continue
        for region, region_label in selection_regions.items():
            pass_region = region.startswith("pass")
            for i, shape_var in enumerate(shape_vars):
                plot_params = {
                    "hists": hists[shape][region],
                    "sig_keys": sig_keys,
                    "bg_keys": pbg_keys,
                    "bg_err": bgerrs[shape][region],
                    "resonant": False,
                    "sig_scale_dict": sig_scale_dict if pass_region else None,
                    "show": False,
                    "year": "all",
                    "ylim": ylims[region],
                    # "title": f"{shape_label} {region_label} Region{title_label}",
                    "region_label": f"{region_label} Region",
                    "name": f"{plot_dir}/{pplotdir}/{shape}_{region}_{shape_var.var}.pdf",
                    "ratio_ylims": [0, 10] if region == "passvbf" else [0, 5],
                    "cmslabel": plabel,
                    "leg_args": {"fontsize": 22, "ncol": 2},
                }

                plotting.ratioHistPlot(**plot_params, data_err=True)

    #     break
    # break

## QCD Transfer Factor

In [None]:
import matplotlib.ticker as mticker
import mplhep as hep

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))

In [None]:
ylims = {"passggf": 1e-4, "passvbf": 1e-5}
tfs = {}

for region, region_label in selection_regions.items():
    if region == "fail":
        continue

    tf = hists["postfit"][region]["QCD", ...] / hists["postfit"]["fail"]["QCD", ...]
    tfs[region] = tf

    hep.histplot(tf)
    plt.title(f"{region_label} Region")
    plt.ylabel("QCD Transfer Factor")
    plt.xlim([50, 250])
    plt.ylim([0, ylims[region]])
    plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
    plt.savefig(f"{plot_dir}/{region}_QCDTF.pdf", bbox_inches="tight")
    plt.show()

In [None]:
tf = tfs["passvbf"]
slope = (tf.view()[-1] - tf.view()[0]) / (245 - 55)
yint = tf.view()[0] - slope * 55
print(slope, yint)