In [None]:
import utils
import plotting
import postprocessing
import corrections

from utils import CUT_MAX_VAL
from hh_vars import (
    years,
    sig_key,
    data_key,
    qcd_key,
    bg_keys,
    samples,
    txbb_wps,
    jec_shifts,
    jmsr_shifts,
)
from postprocessing import (
    shape_var,
    shape_bins,
    blind_window,
    selection_regions,
    selection_regions_label,
    # selection_regions_year,
)

import numpy as np
import pandas as pd
import pickle
from pandas.errors import SettingWithCopyWarning
from hist import Hist

import os
from copy import deepcopy
from inspect import cleandoc
import warnings

import matplotlib.pyplot as plt
import mplhep as hep
import matplotlib.ticker as mticker

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
plt.rcParams.update({"font.size": 16})

# ignore these because they don't seem to apply
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
MAIN_DIR = "../../../"
samples_dir = f"{MAIN_DIR}/../data/skimmer/Feb24"
signal_samples_dir = f"{MAIN_DIR}/../data/skimmer/Feb24"
year = "2018"

plot_dir = "../../../plots/TTbarCheck/Mar2"
# templates_dir = "templates/Feb28/"
_ = os.system(f"mkdir -p {plot_dir}")
_ = os.system(f"mkdir -p {plot_dir}/cutflows/")
_ = os.system(f"mkdir -p {plot_dir}/templates/")
# _ = os.system(f"mkdir -p {templates_dir}")

Load samples

In [None]:
filters = postprocessing.filters
systematics = {}

# save cutflow as pandas table
cutflow = pd.DataFrame(index=list(samples.keys()))

# utils.remove_empty_parquets(samples_dir, year)
events_dict = utils.load_samples(signal_samples_dir, {sig_key: samples[sig_key]}, year, filters)
events_dict |= utils.load_samples(
    samples_dir, {k: samples[k] for k in samples.keys() - [sig_key]}, year, filters
)

utils.add_to_cutflow(events_dict, "BDTPreselection", "weight", cutflow)

print("")
# print weighted sample yields
for sample in events_dict:
    tot_weight = np.sum(events_dict[sample]["weight"].values)
    print(f"Pre-selection {sample} yield: {tot_weight:.2f}")

Scale factors and bb VV assignment

In [None]:
postprocessing.apply_weights(events_dict, year, cutflow)
bb_masks = postprocessing.bb_VV_assignment(events_dict)
# events_dict[sig_key] = postprocessing.postprocess_lpsfs(events_dict[sig_key])
cutflow

In [None]:
postprocessing.load_bdt_preds(
    events_dict, year, f"{samples_dir}/inferences/", list(samples.keys()), jec_jmsr_shifts=True
)

In [None]:
# {var: (bins, label)}
control_plot_vars = {
    "VVFatJetParTMD_probT": ([50, 0, 1], r"Prob(Top) (Mass-Decorrelated)"),
    "VVFatJetParTMD_probQCD": ([50, 0, 1], r"Prob(QCD) (Mass-Decorrelated)"),
    "VVFatJetParTMD_probHWW3q": ([50, 0, 1], r"Prob(HWW3q) (Mass-Decorrelated)"),
    "VVFatJetParTMD_probHWW4q": ([50, 0, 1], r"Prob(HWW4q) (Mass-Decorrelated)"),
}

postprocessing.control_plots(
    events_dict,
    bb_masks,
    control_plot_vars,
    plot_dir,
    year,
    show=True,
)

In [None]:
# {var: (bins, label)}
control_plot_vars = {
    # "VVFatJetParTMD_probT": ([50, 0.9, 1], r"Prob(Top) (Mass-Decorrelated)"),
    # "VVFatJetParTMD_probQCD": ([50, 0, 0.1], r"Prob(QCD) (Mass-Decorrelated)"),
    # "VVFatJetParTMD_probHWW3q": ([50, 0, 0.1], r"Prob(HWW3q) (Mass-Decorrelated)"),
    # "VVFatJetParTMD_probHWW4q": ([50, 0, 0.1], r"Prob(HWW4q) (Mass-Decorrelated)"),
    # "MET_pt": ([50, 0, 250], r"$p^{miss}_T$ (GeV)"),
    # "DijetEta": ([50, -8, 8], r"$\eta^{jj}$"),
    # "DijetPt": ([50, 0, 750], r"$p_T^{jj}$ (GeV)"),
    # "DijetMass": ([50, 500, 3000], r"$m^{jj}$ (GeV)"),
    # "bbFatJetEta": ([50, -2.4, 2.4], r"$\eta^{bb}$"),
    # "bbFatJetPt": ([50, 300, 1300], r"$p^{bb}_T$ (GeV)"),
    "bbFatJetParticleNetMass": ([20, 50, 250], r"$m^{bb}_{reg}$ (GeV)"),
    "bbFatJetMsd": ([20, 50, 250], r"$m^{bb}_{msd}$ (GeV)"),
    # "bbFatJetParticleNetMD_Txbb": ([50, 0.8, 1], r"$p^{bb}_{Txbb}$"),
    # "VVFatJetEta": ([50, -2.4, 2.4], r"$\eta^{VV}$"),
    # "VVFatJetPt": ([50, 300, 1300], r"$p^{VV}_T$ (GeV)"),
    "VVFatJetMsd": ([20, 50, 250], r"$m^{VV}_{msd}$ (GeV)"),
    "VVFatJetParticleNetMass": ([20, 50, 250], r"$m^{VV}_{reg}$ (GeV)"),
}

selection_regions = {
    "vv_top_cut": {"VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL]},
    # "vv_top_mass_cut": {
    #     "VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL]
    # },
    "vv_top_bdt_cut": {
        "VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL],
        "BDTScore": [0.8, CUT_MAX_VAL],
    },
}

sel, cf = utils.make_selection(selection_regions["vv_top_bdt_cut"], events_dict, bb_masks)
sel_dict = {key: events[sel[key]] for key, events in events_dict.items()}
sel_bb_masks = {key: masks[sel[key]] for key, masks in bb_masks.items()}

postprocessing.control_plots(
    sel_dict,
    sel_bb_masks,
    control_plot_vars,
    plot_dir,
    year,
    cutstr="topbdtcut_",
    show=True,
)

In [None]:
selection_regions = {
    "vv_top_cut": {"VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL]},
    "vv_top_bdt_cut": {
        "VVFatJetParTMD_probT": [0.8, CUT_MAX_VAL],
        "BDTScore": [0.8, CUT_MAX_VAL],
    },
}

sel, cf = utils.make_selection(selection_regions["vv_top_bdt_cut"], events_dict, bb_masks)

# {var: (bins, label)}
control_plot_vars = {
    "bbFatJetMsd": ([50, 0, 300], r"$mSD^{bb}$ (GeV)"),
}

sel_dict = {key: events[sel[key]] for key, events in events_dict.items()}
sel_bb_masks = {key: masks[sel[key]] for key, masks in bb_masks.items()}

postprocessing.control_plots(
    sel_dict,
    sel_bb_masks,
    control_plot_vars,
    plot_dir,
    year,
    show=True,
)

Templates

In [None]:
selection_regions = {
    "vv_top_cut": {"VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL]},
    "vv_top_hww_cut": {
        "VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL],
        "VVFatJetParTMD_THWW4q": [0.9, CUT_MAX_VAL],
    },
    "vv_top_bdt5_cut": {
        "VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL],
        "BDTScore": [0.5, CUT_MAX_VAL],
    },
    "vv_top_bdt6_cut": {
        "VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL],
        "BDTScore": [0.6, CUT_MAX_VAL],
    },
    "vv_top_bdt7_cut": {
        "VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL],
        "BDTScore": [0.7, CUT_MAX_VAL],
    },
    "vv_top_bdt8_cut": {
        "VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL],
        "BDTScore": [0.8, CUT_MAX_VAL],
    },
    "vv_top_bdt8_cut": {
        "VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL],
        "BDTScore": [0.8, CUT_MAX_VAL],
    },
    "vv_top_bdt85_cut": {
        "VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL],
        "BDTScore": [0.85, CUT_MAX_VAL],
    },
    "vv_top_hbb_cut": {
        "VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL],
        "bbFatJetParticleNetMD_Txbb": [0.98, CUT_MAX_VAL],
    },
    "pass_top_veto": {
        "BDTScore": [0.99, CUT_MAX_VAL],
        "bbFatJetParticleNetMD_Txbb": [0.988, CUT_MAX_VAL],
        "VVFatJetParTMD_probT": [-CUT_MAX_VAL, 0.9],
    },
    "TxbbCut": {
        "bbFatJetParticleNetMD_Txbb": [0.988, CUT_MAX_VAL],
    },
    "BDTCut": {
        "BDTScore": [0.99, CUT_MAX_VAL],
    },
    "top_cut_pass": {
        "BDTScore": [0.99, CUT_MAX_VAL],
        "bbFatJetParticleNetMD_Txbb": [0.988, CUT_MAX_VAL],
        "VVFatJetParTMD_probT": [0.9, CUT_MAX_VAL],
    },
}

selection_regions_label = {
    "pass": "Pass",
    "pass_top_veto": "Pass & P(Top)$^{VV}$ < 0.9",
    "top_cut_pass": "Pass & P(Top)$^{VV}$ > 0.9",
    "fail": "Fail",
    "BDTOnly": "BDT Cut",
    "TxbbCut": "Txbb Cut Only",
    "BDTCut": "BDT Cut Only",
    "vv_top_cut": r"P(Top)$^{VV}$ > 0.9",
    "vv_top_hww_cut": r"P(Top) & P(HWW) Cut",
    "vv_top_bdt_cut": r"P(Top) & BDT Cut",
    "vv_top_bdt5_cut": r"P(Top) & BDT > 0.5 Cut",
    "vv_top_bdt6_cut": r"P(Top) & BDT > 0.6 Cut",
    "vv_top_bdt7_cut": r"P(Top) & BDT > 0.7 Cut",
    "vv_top_bdt8_cut": r"P(Top) & BDT > 0.8 Cut",
    "vv_top_bdt85_cut": r"P(Top) & BDT > 0.85 Cut",
    "vv_top_bdt9_cut": r"P(Top) & BDT > 0.9 Cut",
    "vv_top_hbb_cut": r"P(Top) & P(Hbb)$^{bb}$ Cut",
}

postprocessing.get_templates(
    events_dict,
    bb_masks,
    year,
    selection_regions,
    shape_var,
    shape_bins,
    blind_window,
    plot_dir=plot_dir,
    prev_cutflow=cutflow,
    weight_shifts={},
    jshift="",
    selection_regions_label=selection_regions_label,
    show=True,
)

In [None]:
templates = {}

for jshift in [""] + jec_shifts + jmsr_shifts:
    print(jshift)
    ttemps, tsyst = postprocessing.get_templates(
        events_dict,
        bb_masks,
        year,
        selection_regions[year],
        shape_var,
        shape_bins,
        blind_window,
        plot_dir=plot_dir,
        prev_cutflow=cutflow,
        weight_shifts=postprocessing.weight_shifts,
        jshift=jshift,
        show=False,
    )

    templates = {**templates, **ttemps}
    systematics = {**systematics, **tsyst}