In [None]:
from collections import OrderedDict

import uproot
import numpy as np
import matplotlib.pyplot as plt
import pickle

import hist
from hist import Hist

import plotting
from HHbbVV.hh_vars import data_key, years, bg_keys, nonres_sig_keys
from postprocessing import nonres_shape_vars, get_nonres_selection_regions
from datacardHelpers import sum_templates
from HHbbVV.postprocessing import utils

from pathlib import Path

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
MAIN_DIR = Path("../../../")
nTF = 0

vbf_only = False
ggf_only = False
k2v0sig = False
unblinded = True

plot_dir = MAIN_DIR / f"plots/PostFit/24Jul18Fig8Prelim"
# plot_dir = (
#     MAIN_DIR
#     / "plots/PostFit/24Apr9ggFScan/nTF1/ggf_txbb_MP_ggf_bdt_0.9965_vbf_txbb_HP_vbf_bdt_0.999_lepton_veto_Hbb"
# )
plot_dir.mkdir(exist_ok=True, parents=True)

In [None]:
cards_dir = "24Apr26NonresBDT995/ggf-sig-only"
# cards_dir = f"f_tests/24Apr26NonresBDT995/ggf/nTF_{nTF}"
asimov = False

asimov_label = "Asimov" if asimov else ""
file = uproot.open(
    f"/uscms/home/rkansal/hhcombine/cards/{cards_dir}/FitShapesB{asimov_label}.root"
    # f"/uscms/home/rkansal/eos/bbVV/cards/{cards_dir}/FitShapes.root"
)

In [None]:
# templates_dir = Path(f"templates/{cards_dir}")
templates_dir = Path(f"templates/24Apr26NonresBDT995AllSigs")
# templates_dir = Path(
#     f"templates/24Apr9ggFScan/ggf_txbb_HP_ggf_bdt_0.996_vbf_txbb_HP_vbf_bdt_0.999_lepton_veto_Hbb"
# )
templates_dict = {}
for year in years:
    with (templates_dir / f"{year}_templates.pkl").open("rb") as f:
        templates_dict[year] = pickle.load(f)
pre_templates = sum_templates(templates_dict, years)

In [None]:
workspace_data_key = "data_obs"  # if not asimov else "toy_asimov"

# (name in templates, name in cards)
hist_label_map_inverse = OrderedDict(
    [
        ("QCD", "CMS_bbWW_hadronic_qcd_datadriven"),
        # ("Diboson", "diboson"),
        ("TT", "ttbar"),
        ("ST", "singletop"),
        ("Z+Jets", "zjets"),
        ("W+Jets", "wjets"),
        # ("HHbbVV", "ggHH_kl_1_kt_1_hbbhww4q"),
        # ("VBFHHbbVV", "qqHH_CV_1_C2V_1_kl_1_HHbbww"),  # change to hbbhww future
        (data_key, workspace_data_key),
    ]
)

hist_label_map = {val: key for key, val in hist_label_map_inverse.items()}

sig_keys = ["HHbbVV", "VBFHHbbVV", "qqHH_CV_1_C2V_0_kl_1_HHbbVV", "qqHH_CV_1_C2V_2_kl_1_HHbbVV"]
# sig_keys = ["qqHH_CV_1_C2V_0_kl_1_HHbbVV"] if k2v0sig else ["HHbbVV"]
samples = bg_keys + sig_keys + [data_key]

In [None]:
shapes = {
    "prefit": "Pre-Fit",
    # "shapes_fit_s": "S+B Post-Fit",
    "postfit": "B-only Post-Fit",
}

# if unblinded:
#     shapes["postfit"] = "Post-Fit"

shape_vars = nonres_shape_vars

selection_regions = {
    "passvbf": "VBF",
    "passggf": "ggF",
    "fail": "Fail",
}

if vbf_only:
    selection_regions.pop("passggf")

if ggf_only:
    selection_regions.pop("passvbf")

In [None]:
hists = {}
bgerrs = {}

for shape in shapes:
    print(shape)
    hists[shape] = {
        region: Hist(
            hist.axis.StrCategory(samples, name="Sample"),
            *[shape_var.axis for shape_var in shape_vars],
            storage="double",
        )
        for region in selection_regions
    }
    bgerrs[shape] = {}

    for region in selection_regions:
        h = hists[shape][region]
        templates = file[f"{region}_{shape}"]
        for key, file_key in hist_label_map_inverse.items():
            if key != data_key:
                if file_key not in templates:
                    print(f"No {key} in {region}")
                    continue

                data_key_index = np.where(np.array(list(h.axes[0])) == key)[0][0]
                h.view(flow=False)[data_key_index, :] = templates[file_key].values()

        for key in bg_keys + sig_keys:
            if key not in hist_label_map_inverse:
                data_key_index = np.where(np.array(list(h.axes[0])) == key)[0][0]
                h.view(flow=False)[data_key_index, :] = pre_templates[region][key, ...].values()

        data_key_index = np.where(np.array(list(h.axes[0])) == data_key)[0][0]
        h.view(flow=False)[data_key_index, :] = np.nan_to_num(
            templates[hist_label_map_inverse[data_key]].values()
        )

        bgerrs[shape][region] = templates["TotalBkg"].errors()

In [None]:
if not unblinded:
    for shapeh in hists.values():
        for region, h in shapeh.items():
            if region != "fail":
                utils.blindBins(h, [100, 150], data_key, axis=0)

In [None]:
ylims = {"passggf": 60, "passvbf": 10, "fail": 7e5}
title_label = " Asimov Dataset" if asimov else ""
sig_scale_dict = {
    "HHbbVV": 100,
    "VBFHHbbVV": 2000,
    "qqHH_CV_1_C2V_0_kl_1_HHbbVV": 1,
    "qqHH_CV_1_C2V_2_kl_1_HHbbVV": 1,
}
sig_scale_dict = {key: val for key, val in sig_scale_dict.items() if key in sig_keys}

for shape, shape_label in shapes.items():
    # if shape != "postfit":
    #     continue
    for region, region_label in selection_regions.items():
        pass_region = region.startswith("pass")
        for i, shape_var in enumerate(shape_vars):
            plot_params = {
                "hists": hists[shape][region],
                "sig_keys": sig_keys,
                "bg_keys": bg_keys,
                "bg_err": bgerrs[shape][region],
                "sig_scale_dict": sig_scale_dict if pass_region else None,
                "show": True,
                "year": "all",
                "ylim": ylims[region],
                "title": f"{shape_label} {region_label} Region{title_label}",
                "name": f"{plot_dir}/{shape}_{region}_{shape_var.var}.pdf",
                "ncol": 2,  # if region == "passvbf" else 1,
                "ratio_ylims": [0, 5] if region == "passvbf" else [0, 2],
                "cmslabel": "Preliminary",
            }

            plotting.ratioHistPlot(**plot_params, data_err=True)

    #     break
    # break

## QCD Transfer Factor

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import mplhep as hep

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))

In [None]:
ylims = {"passggf": 1e-4, "passvbf": 1e-5}
tfs = {}

for region, region_label in selection_regions.items():
    if region == "fail":
        continue

    tf = hists["postfit"][region]["QCD", ...] / hists["postfit"]["fail"]["QCD", ...]
    tfs[region] = tf

    hep.histplot(tf)
    plt.title(f"{region_label} Region")
    plt.ylabel("QCD Transfer Factor")
    plt.xlim([50, 250])
    plt.ylim([0, ylims[region]])
    plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
    plt.savefig(f"{plot_dir}/{region}_QCDTF.pdf", bbox_inches="tight")
    plt.show()

In [None]:
tf = tfs["passvbf"]
slope = (tf.view()[-1] - tf.view()[0]) / (245 - 55)
yint = tf.view()[0] - slope * 55
print(slope, yint)