In [1]:
import utils
import plotting
import postprocessing
import corrections

from utils import CUT_MAX_VAL, ShapeVar
from HHbbVV.hh_vars import (
    years,
    data_key,
    qcd_key,
    bg_keys,
    samples,
    nonres_sig_keys,
    nonres_samples,
    txbb_wps,
    jec_shifts,
    jmsr_shifts,
    LUMI,
)
from postprocessing import res_shape_vars, load_filters

from collections import OrderedDict

import numpy as np
import pandas as pd
import pickle, json
import hist
from hist import Hist

import os
from pathlib import Path
from copy import deepcopy
from inspect import cleandoc
import warnings

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
MAIN_DIR = Path("../../../")
samples_dir = MAIN_DIR / "../data/skimmer/24Mar5AllYears"
# samples_dir = "/ceph/cms/store/user/annava/projects/HHbbVV/24Mar5AllYears"
year = "2016"

date = "24Mar6"
plot_dir = MAIN_DIR / f"plots/PostProcessing/{date}/"
templates_dir = Path(f"templates/{date}/")

_ = os.system(f"mkdir -p {plot_dir}/ControlPlots/{year}")
_ = os.system(f"mkdir -p {plot_dir}/cutflows")
_ = os.system(f"mkdir -p {plot_dir}/templates/wshifts")
_ = os.system(f"mkdir -p {plot_dir}/templates/jshifts")
_ = os.system(f"mkdir -p {plot_dir}/templates/hists2d")
_ = os.system(f"mkdir -p {templates_dir}/cutflows/{year}")

Load samples

In [None]:
systematics = {year: {}}

# save cutflow as pandas table
cutflow = pd.DataFrame(index=list(samples.keys()) + list(nonres_samples.keys()))

events_dict = postprocessing.load_samples(
    samples_dir,
    {**nonres_samples, **samples},
    year,
    load_filters,
)

utils.add_to_cutflow(events_dict, "Preselection", "finalWeight", cutflow)
cutflow

Scale factors and bb VV assignment

In [None]:
postprocessing.qcd_sf(events_dict, cutflow)
bb_masks = postprocessing.bb_VV_assignment(events_dict)
postprocessing.derive_variables(events_dict, bb_masks, nonres_vars=False)
cutflow

Control Plots

In [None]:
# {var: (bins, label)}
control_plot_vars = [
    # ShapeVar(var="MET_pt", label=r"$p^{miss}_T$ (GeV)", bins=[50, 0, 300]),
    # ShapeVar(var="DijetEta", label=r"$\eta^{jj}$", bins=[30, -8, 8]),
    # ShapeVar(var="DijetPt", label=r"$p_T^{jj}$ (GeV)", bins=[30, 0, 750]),
    # ShapeVar(var="DijetMass", label=r"$m^{jj}$ (GeV)", bins=[30, 600, 4000]),
    # ShapeVar(var="bbFatJetEta", label=r"$\eta^{bb}$", bins=[20, -2.4, 2.4]),
    # ShapeVar(
    #     var="bbFatJetPt", label=r"$p^{bb}_T$ (GeV)", bins=[20, 300, 2300], significance_dir="right"
    # ),
    ShapeVar(
        var="bbFatJetParticleNetMass",
        label=r"$m^{bb}_{reg}$ (GeV)",
        bins=[20, 50, 250],
        significance_dir="bin",
    ),
    ShapeVar(var="bbFatJetMsd", label=r"$m^{bb}_{msd}$ (GeV)", bins=[20, 0, 300]),
    # ShapeVar(var="bbFatJetParticleNetMD_Txbb", label=r"$T^{bb}_{Xbb}$", bins=[50, 0.8, 1]),
    # ShapeVar(var="VVFatJetEta", label=r"$\eta^{VV}$", bins=[30, -2.4, 2.4]),
    # ShapeVar(var="VVFatJetPt", label=r"$p^{VV}_T$ (GeV)", bins=[20, 300, 2300]),
    # ShapeVar(var="VVFatJetParticleNetMass", label=r"$m^{VV}_{reg}$ (GeV)", bins=[20, 50, 250]),
    # ShapeVar(var="VVFatJetMsd", label=r"$m^{VV}_{msd}$ (GeV)", bins=[40, 50, 250]),
    # ShapeVar(var="VVFatJetParticleNet_Th4q", label=r"Prob($H \to 4q$) vs Prob(QCD) (Non-MD)", bins=[50, 0, 1]),
    # ShapeVar(var="VVFatJetParTMD_THWW4q", label=r"Prob($H \to VV \to 4q$) vs Prob(QCD) (Mass-Decorrelated)", bins=[50, 0, 1]),
    # ShapeVar(var="VVFatJetParTMD_probT", label=r"Prob(Top) (Mass-Decorrelated)", bins=[50, 0, 1]),
    # ShapeVar(var="VVFatJetParTMD_THWWvsT", label=r"$T^{VV}_{HWW}$", bins=[50, 0, 1]),
    # ShapeVar(var="bbFatJetPtOverDijetPt", label=r"$p^{bb}_T / p_T^{jj}$", bins=[50, 0, 40]),
    # ShapeVar(var="VVFatJetPtOverDijetPt", label=r"$p^{VV}_T / p_T^{jj}$", bins=[50, 0, 40]),
    # ShapeVar(var="VVFatJetPtOverbbFatJetPt", label=r"$p^{VV}_T / p^{bb}_T$", bins=[50, 0.4, 2.0]),
    # ShapeVar(var="nGoodMuonsHbb", label=r"# of Muons", bins=[3, 0, 3]),
    # ShapeVar(var="nGoodMuonsHH", label=r"# of Muons", bins=[3, 0, 3]),
    # ShapeVar(var="nGoodElectronsHbb", label=r"# of Electrons", bins=[3, 0, 3]),
    # ShapeVar(var="nGoodElectronsHH", label=r"# of Electrons", bins=[3, 0, 3]),
    # ShapeVar(var="nGoodJets", label=r"# of AK4 B-Jets", bins=[5, 0, 5]),
    # removed if not ggF nonresonant - needs to be the last variable!
    # ShapeVar(var="BDTScore", label=r"BDT Score", bins=[50, 0, 1]),
]

hists = postprocessing.control_plots(
    events_dict,
    bb_masks,
    postprocessing.plot_sig_keys_nonres,
    control_plot_vars,
    plot_dir / f"ControlPlots/{year}",
    year,
    bg_keys=bg_keys,
    sig_scale_dict={
        "HHbbVV": 1e5,
        "VBFHHbbVV": 2e5,
        "qqHH_CV_1_C2V_0_kl_1_HHbbVV": 2e3,
        "qqHH_CV_1_C2V_2_kl_1_HHbbVV": 2e3,
    }
    show=True,
)